# HG changeset patch
# User bgruening
# Date 1557872112 14400
# Node ID e94395c672bd0894e88af9a45cd799734adab020
# Parent 39ae276e75d930f2e123277afc39396fca19b69f
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit c0a3a186966888e5787335a7628bf0a4382637e7
diff -r 39ae276e75d9 -r e94395c672bd ensemble.xml
--- a/ensemble.xml Sun Dec 30 01:56:11 2018 -0500
+++ b/ensemble.xml Tue May 14 18:15:12 2019 -0400
@@ -14,17 +14,18 @@
0")
+
+ support_ = np.ones(n_features, dtype=np.bool)
+ ranking_ = np.ones(n_features, dtype=np.int)
+
+ if step_score:
+ self.scores_ = []
+
+ step_i = 0
+ # Elimination
+ while np.sum(support_) > n_features_to_select and step_i < len(step):
+
+ # if last step is 1, will keep loop
+ if step_i == len(step) - 1 and step[step_i] != 0:
+ step.append(step[step_i])
+
+ # Remaining features
+ features = np.arange(n_features)[support_]
+
+ # Rank the remaining features
+ estimator = clone(self.estimator)
+ if self.verbose > 0:
+ print("Fitting estimator with %d features." % np.sum(support_))
+
+ estimator.fit(X[:, features], y)
+
+ # Get coefs
+ if hasattr(estimator, 'coef_'):
+ coefs = estimator.coef_
+ else:
+ coefs = getattr(estimator, 'feature_importances_', None)
+ if coefs is None:
+ raise RuntimeError('The classifier does not expose '
+ '"coef_" or "feature_importances_" '
+ 'attributes')
+
+ # Get ranks
+ if coefs.ndim > 1:
+ ranks = np.argsort(safe_sqr(coefs).sum(axis=0))
+ else:
+ ranks = np.argsort(safe_sqr(coefs))
+
+ # for sparse case ranks is matrix
+ ranks = np.ravel(ranks)
+
+ # Eliminate the worse features
+ threshold =\
+ min(step[step_i], np.sum(support_) - n_features_to_select)
+
+ # Compute step score on the previous selection iteration
+ # because 'estimator' must use features
+ # that have not been eliminated yet
+ if step_score:
+ self.scores_.append(step_score(estimator, features))
+ support_[features[ranks][:threshold]] = False
+ ranking_[np.logical_not(support_)] += 1
+
+ step_i += 1
+
+ # Set final attributes
+ features = np.arange(n_features)[support_]
+ self.estimator_ = clone(self.estimator)
+ self.estimator_.fit(X[:, features], y)
+
+ # Compute step score when only n_features_to_select features left
+ if step_score:
+ self.scores_.append(step_score(self.estimator_, features))
+ self.n_features_ = support_.sum()
+ self.support_ = support_
+ self.ranking_ = ranking_
+
+ return self
+
+
+class DyRFECV(RFECV, MetaEstimatorMixin):
+ """
+ Compared with RFECV, DyRFECV offers flexiable `step` to eleminate
+ features, in the format of list, while RFECV supports only fixed number
+ of `step`.
+
+ Parameters
+ ----------
+ estimator : object
+ A supervised learning estimator with a ``fit`` method that provides
+ information about feature importance either through a ``coef_``
+ attribute or through a ``feature_importances_`` attribute.
+ step : int or float, optional (default=1)
+ If greater than or equal to 1, then ``step`` corresponds to the
+ (integer) number of features to remove at each iteration.
+ If within (0.0, 1.0), then ``step`` corresponds to the percentage
+ (rounded down) of features to remove at each iteration.
+ If list, a series of step to remove at each iteration. iteration stopes
+ when finishing all steps
+ Note that the last iteration may remove fewer than ``step`` features in
+ order to reach ``min_features_to_select``.
+ min_features_to_select : int, (default=1)
+ The minimum number of features to be selected. This number of features
+ will always be scored, even if the difference between the original
+ feature count and ``min_features_to_select`` isn't divisible by
+ ``step``.
+ cv : int, cross-validation generator or an iterable, optional
+ Determines the cross-validation splitting strategy.
+ Possible inputs for cv are:
+ - None, to use the default 3-fold cross-validation,
+ - integer, to specify the number of folds.
+ - :term:`CV splitter`,
+ - An iterable yielding (train, test) splits as arrays of indices.
+ For integer/None inputs, if ``y`` is binary or multiclass,
+ :class:`sklearn.model_selection.StratifiedKFold` is used. If the
+ estimator is a classifier or if ``y`` is neither binary nor multiclass,
+ :class:`sklearn.model_selection.KFold` is used.
+ Refer :ref:`User Guide ` for the various
+ cross-validation strategies that can be used here.
+ .. versionchanged:: 0.20
+ ``cv`` default value of None will change from 3-fold to 5-fold
+ in v0.22.
+ scoring : string, callable or None, optional, (default=None)
+ A string (see model evaluation documentation) or
+ a scorer callable object / function with signature
+ ``scorer(estimator, X, y)``.
+ verbose : int, (default=0)
+ Controls verbosity of output.
+ n_jobs : int or None, optional (default=None)
+ Number of cores to run in parallel while fitting across folds.
+ ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.
+ ``-1`` means using all processors. See :term:`Glossary `
+ for more details.
+ """
+ def __init__(self, estimator, step=1, min_features_to_select=1, cv='warn',
+ scoring=None, verbose=0, n_jobs=None):
+ super(DyRFECV, self).__init__(
+ estimator, step=step,
+ min_features_to_select=min_features_to_select,
+ cv=cv, scoring=scoring, verbose=verbose,
+ n_jobs=n_jobs)
+
+ def fit(self, X, y, groups=None):
+ """Fit the RFE model and automatically tune the number of selected
+ features.
+ Parameters
+ ----------
+ X : {array-like, sparse matrix}, shape = [n_samples, n_features]
+ Training vector, where `n_samples` is the number of samples and
+ `n_features` is the total number of features.
+ y : array-like, shape = [n_samples]
+ Target values (integers for classification, real numbers for
+ regression).
+ groups : array-like, shape = [n_samples], optional
+ Group labels for the samples used while splitting the dataset into
+ train/test set.
+ """
+ if type(self.step) is not list:
+ return super(DyRFECV, self).fit(X, y, groups)
+
+ X, y = check_X_y(X, y, "csr")
+
+ # Initialization
+ cv = check_cv(self.cv, y, is_classifier(self.estimator))
+ scorer = check_scoring(self.estimator, scoring=self.scoring)
+ n_features = X.shape[1]
+
+ step = []
+ for s in self.step:
+ if 0.0 < s < 1.0:
+ step.append(int(max(1, s * n_features)))
+ else:
+ step.append(int(s))
+ if s <= 0:
+ raise ValueError("Step must be >0")
+
+ # Build an RFE object, which will evaluate and score each possible
+ # feature count, down to self.min_features_to_select
+ rfe = DyRFE(estimator=self.estimator,
+ n_features_to_select=self.min_features_to_select,
+ step=self.step, verbose=self.verbose)
+
+ # Determine the number of subsets of features by fitting across
+ # the train folds and choosing the "features_to_select" parameter
+ # that gives the least averaged error across all folds.
+
+ # Note that joblib raises a non-picklable error for bound methods
+ # even if n_jobs is set to 1 with the default multiprocessing
+ # backend.
+ # This branching is done so that to
+ # make sure that user code that sets n_jobs to 1
+ # and provides bound methods as scorers is not broken with the
+ # addition of n_jobs parameter in version 0.18.
+
+ if effective_n_jobs(self.n_jobs) == 1:
+ parallel, func = list, _rfe_single_fit
+ else:
+ parallel = Parallel(n_jobs=self.n_jobs)
+ func = delayed(_rfe_single_fit)
+
+ scores = parallel(
+ func(rfe, self.estimator, X, y, train, test, scorer)
+ for train, test in cv.split(X, y, groups))
+
+ scores = np.sum(scores, axis=0)
+ diff = int(scores.shape[0]) - len(step)
+ if diff > 0:
+ step = np.r_[step, [step[-1]] * diff]
+ scores_rev = scores[::-1]
+ argmax_idx = len(scores) - np.argmax(scores_rev) - 1
+ n_features_to_select = max(
+ n_features - sum(step[:argmax_idx]),
+ self.min_features_to_select)
+
+ # Re-execute an elimination with best_k over the whole set
+ rfe = DyRFE(estimator=self.estimator,
+ n_features_to_select=n_features_to_select, step=self.step,
+ verbose=self.verbose)
+
+ rfe.fit(X, y)
+
+ # Set final attributes
+ self.support_ = rfe.support_
+ self.n_features_ = rfe.n_features_
+ self.ranking_ = rfe.ranking_
+ self.estimator_ = clone(self.estimator)
+ self.estimator_.fit(self.transform(X), y)
+
+ # Fixing a normalization error, n is equal to get_n_splits(X, y) - 1
+ # here, the scores are normalized by get_n_splits(X, y)
+ self.grid_scores_ = scores[::-1] / cv.get_n_splits(X, y, groups)
+ return self
+
+
+class MyPipeline(pipeline.Pipeline):
+ """
+ Extend pipeline object to have feature_importances_ attribute
+ """
+ def fit(self, X, y=None, **fit_params):
+ super(MyPipeline, self).fit(X, y, **fit_params)
+ estimator = self.steps[-1][-1]
+ if hasattr(estimator, 'coef_'):
+ coefs = estimator.coef_
+ else:
+ coefs = getattr(estimator, 'feature_importances_', None)
+ if coefs is None:
+ raise RuntimeError('The estimator in the pipeline does not expose '
+ '"coef_" or "feature_importances_" '
+ 'attributes')
+ self.feature_importances_ = coefs
+ return self
+
+
+class MyimbPipeline(imbPipeline):
+ """
+ Extend imblance pipeline object to have feature_importances_ attribute
+ """
+ def fit(self, X, y=None, **fit_params):
+ super(MyimbPipeline, self).fit(X, y, **fit_params)
+ estimator = self.steps[-1][-1]
+ if hasattr(estimator, 'coef_'):
+ coefs = estimator.coef_
+ else:
+ coefs = getattr(estimator, 'feature_importances_', None)
+ if coefs is None:
+ raise RuntimeError('The estimator in the pipeline does not expose '
+ '"coef_" or "feature_importances_" '
+ 'attributes')
+ self.feature_importances_ = coefs
+ return self
+
+
+def check_feature_importances(estimator):
+ """
+ For pipeline object which has no feature_importances_ property,
+ this function returns the same comfigured pipeline object with
+ attached the last estimator's feature_importances_.
+ """
+ if estimator.__class__.__module__ == 'sklearn.pipeline':
+ pipeline_steps = estimator.get_params()['steps']
+ estimator = MyPipeline(pipeline_steps)
+ elif estimator.__class__.__module__ == 'imblearn.pipeline':
+ pipeline_steps = estimator.get_params()['steps']
+ estimator = MyimbPipeline(pipeline_steps)
+ else:
+ return estimator
diff -r 39ae276e75d9 -r e94395c672bd iraps_classifier.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/iraps_classifier.py Tue May 14 18:15:12 2019 -0400
@@ -0,0 +1,569 @@
+"""
+class IRAPSCore
+class IRAPSClassifier
+class BinarizeTargetClassifier
+class BinarizeTargetRegressor
+class _BinarizeTargetScorer
+class _BinarizeTargetProbaScorer
+
+binarize_auc_scorer
+binarize_average_precision_scorer
+
+binarize_accuracy_scorer
+binarize_balanced_accuracy_scorer
+binarize_precision_scorer
+binarize_recall_scorer
+"""
+
+
+import numpy as np
+import random
+import warnings
+
+from abc import ABCMeta
+from scipy.stats import ttest_ind
+from sklearn import metrics
+from sklearn.base import BaseEstimator, clone, RegressorMixin
+from sklearn.externals import six
+from sklearn.feature_selection.univariate_selection import _BaseFilter
+from sklearn.metrics.scorer import _BaseScorer
+from sklearn.pipeline import Pipeline
+from sklearn.utils import as_float_array, check_X_y
+from sklearn.utils._joblib import Parallel, delayed
+from sklearn.utils.validation import (check_array, check_is_fitted,
+ check_memory, column_or_1d)
+
+
+VERSION = '0.1.1'
+
+
+class IRAPSCore(six.with_metaclass(ABCMeta, BaseEstimator)):
+ """
+ Base class of IRAPSClassifier
+ From sklearn BaseEstimator:
+ get_params()
+ set_params()
+
+ Parameters
+ ----------
+ n_iter : int
+ sample count
+
+ positive_thres : float
+ z_score shreshold to discretize positive target values
+
+ negative_thres : float
+ z_score threshold to discretize negative target values
+
+ verbose : int
+ 0 or geater, if not 0, print progress
+
+ n_jobs : int, default=1
+ The number of CPUs to use to do the computation.
+
+ pre_dispatch : int, or string.
+ Controls the number of jobs that get dispatched during parallel
+ execution. Reducing this number can be useful to avoid an
+ explosion of memory consumption when more jobs get dispatched
+ than CPUs can process. This parameter can be:
+ - None, in which case all the jobs are immediately
+ created and spawned. Use this for lightweight and
+ fast-running jobs, to avoid delays due to on-demand
+ spawning of the jobs
+ - An int, giving the exact number of total jobs that are
+ spawned
+ - A string, giving an expression as a function of n_jobs,
+ as in '2*n_jobs'
+
+ random_state : int or None
+ """
+
+ def __init__(self, n_iter=1000, positive_thres=-1, negative_thres=0,
+ verbose=0, n_jobs=1, pre_dispatch='2*n_jobs',
+ random_state=None):
+ """
+ IRAPS turns towwards general Anomaly Detection
+ It comapares positive_thres with negative_thres,
+ and decide which portion is the positive target.
+ e.g.:
+ (positive_thres=-1, negative_thres=0)
+ => positive = Z_score of target < -1
+ (positive_thres=1, negative_thres=0)
+ => positive = Z_score of target > 1
+
+ Note: The positive targets here is always the
+ abnormal minority group.
+ """
+ self.n_iter = n_iter
+ self.positive_thres = positive_thres
+ self.negative_thres = negative_thres
+ self.verbose = verbose
+ self.n_jobs = n_jobs
+ self.pre_dispatch = pre_dispatch
+ self.random_state = random_state
+
+ def fit(self, X, y):
+ """
+ X: array-like (n_samples x n_features)
+ y: 1-d array-like (n_samples)
+ """
+ X, y = check_X_y(X, y, ['csr', 'csc'], multi_output=False)
+
+ def _stochastic_sampling(X, y, random_state=None, positive_thres=-1,
+ negative_thres=0):
+ # each iteration select a random number of random subset of
+ # training samples. this is somewhat different from the original
+ # IRAPS method, but effect is almost the same.
+ SAMPLE_SIZE = [0.25, 0.75]
+ n_samples = X.shape[0]
+
+ if random_state is None:
+ n_select = random.randint(int(n_samples * SAMPLE_SIZE[0]),
+ int(n_samples * SAMPLE_SIZE[1]))
+ index = random.sample(list(range(n_samples)), n_select)
+ else:
+ n_select = random.Random(random_state).randint(
+ int(n_samples * SAMPLE_SIZE[0]),
+ int(n_samples * SAMPLE_SIZE[1]))
+ index = random.Random(random_state).sample(
+ list(range(n_samples)), n_select)
+
+ X_selected, y_selected = X[index], y[index]
+
+ # Spliting by z_scores.
+ y_selected = (y_selected - y_selected.mean()) / y_selected.std()
+ if positive_thres < negative_thres:
+ X_selected_positive = X_selected[y_selected < positive_thres]
+ X_selected_negative = X_selected[y_selected > negative_thres]
+ else:
+ X_selected_positive = X_selected[y_selected > positive_thres]
+ X_selected_negative = X_selected[y_selected < negative_thres]
+
+ # For every iteration, at least 5 responders are selected
+ if X_selected_positive.shape[0] < 5:
+ warnings.warn("Warning: fewer than 5 positives were selected!")
+ return
+
+ # p_values
+ _, p = ttest_ind(X_selected_positive, X_selected_negative,
+ axis=0, equal_var=False)
+
+ # fold_change == mean change?
+ # TODO implement other normalization method
+ positive_mean = X_selected_positive.mean(axis=0)
+ negative_mean = X_selected_negative.mean(axis=0)
+ mean_change = positive_mean - negative_mean
+ # mean_change = np.select(
+ # [positive_mean > negative_mean,
+ # positive_mean < negative_mean],
+ # [positive_mean / negative_mean,
+ # -negative_mean / positive_mean])
+ # mean_change could be adjusted by power of 2
+ # mean_change = 2**mean_change \
+ # if mean_change>0 else -2**abs(mean_change)
+
+ return p, mean_change, negative_mean
+
+ parallel = Parallel(n_jobs=self.n_jobs, verbose=self.verbose,
+ pre_dispatch=self.pre_dispatch)
+ if self.random_state is None:
+ res = parallel(delayed(_stochastic_sampling)(
+ X, y, random_state=None,
+ positive_thres=self.positive_thres,
+ negative_thres=self.negative_thres)
+ for i in range(self.n_iter))
+ else:
+ res = parallel(delayed(_stochastic_sampling)(
+ X, y, random_state=seed,
+ positive_thres=self.positive_thres,
+ negative_thres=self.negative_thres)
+ for seed in range(self.random_state,
+ self.random_state+self.n_iter))
+ res = [_ for _ in res if _]
+ if len(res) < 50:
+ raise ValueError("too few (%d) valid feature lists "
+ "were generated!" % len(res))
+ pvalues = np.vstack([x[0] for x in res])
+ fold_changes = np.vstack([x[1] for x in res])
+ base_values = np.vstack([x[2] for x in res])
+
+ self.pvalues_ = np.asarray(pvalues)
+ self.fold_changes_ = np.asarray(fold_changes)
+ self.base_values_ = np.asarray(base_values)
+
+ return self
+
+
+def _iraps_core_fit(iraps_core, X, y):
+ return iraps_core.fit(X, y)
+
+
+class IRAPSClassifier(six.with_metaclass(ABCMeta, _BaseFilter,
+ BaseEstimator, RegressorMixin)):
+ """
+ Extend the bases of both sklearn feature_selector and classifier.
+ From sklearn BaseEstimator:
+ get_params()
+ set_params()
+ From sklearn _BaseFilter:
+ get_support()
+ fit_transform(X)
+ transform(X)
+ From sklearn RegressorMixin:
+ score(X, y): R2
+ New:
+ predict(X)
+ predict_label(X)
+ get_signature()
+ Properties:
+ discretize_value
+
+ Parameters
+ ----------
+ iraps_core: object
+ p_thres: float, threshold for p_values
+ fc_thres: float, threshold for fold change or mean difference
+ occurrence: float, occurrence rate selected by set of p_thres and fc_thres
+ discretize: float, threshold of z_score to discretize target value
+ memory: None, str or joblib.Memory object
+ min_signature_features: int, the mininum number of features in a signature
+ """
+
+ def __init__(self, iraps_core, p_thres=1e-4, fc_thres=0.1,
+ occurrence=0.8, discretize=-1, memory=None,
+ min_signature_features=1):
+ self.iraps_core = iraps_core
+ self.p_thres = p_thres
+ self.fc_thres = fc_thres
+ self.occurrence = occurrence
+ self.discretize = discretize
+ self.memory = memory
+ self.min_signature_features = min_signature_features
+
+ def fit(self, X, y):
+ memory = check_memory(self.memory)
+ cached_fit = memory.cache(_iraps_core_fit)
+ iraps_core = clone(self.iraps_core)
+ # allow pre-fitted iraps_core here
+ if not hasattr(iraps_core, 'pvalues_'):
+ iraps_core = cached_fit(iraps_core, X, y)
+ self.iraps_core_ = iraps_core
+
+ pvalues = as_float_array(iraps_core.pvalues_, copy=True)
+ # why np.nan is here?
+ pvalues[np.isnan(pvalues)] = np.finfo(pvalues.dtype).max
+
+ fold_changes = as_float_array(iraps_core.fold_changes_, copy=True)
+ fold_changes[np.isnan(fold_changes)] = 0.0
+
+ base_values = as_float_array(iraps_core.base_values_, copy=True)
+
+ p_thres = self.p_thres
+ fc_thres = self.fc_thres
+ occurrence = self.occurrence
+
+ mask_0 = np.zeros(pvalues.shape, dtype=np.int32)
+ # mark p_values less than the threashold
+ mask_0[pvalues <= p_thres] = 1
+ # mark fold_changes only when greater than the threashold
+ mask_0[abs(fold_changes) < fc_thres] = 0
+
+ # count the occurrence and mask greater than the threshold
+ counts = mask_0.sum(axis=0)
+ occurrence_thres = int(occurrence * iraps_core.n_iter)
+ mask = np.zeros(counts.shape, dtype=bool)
+ mask[counts >= occurrence_thres] = 1
+
+ # generate signature
+ fold_changes[mask_0 == 0] = 0.0
+ signature = fold_changes[:, mask].sum(axis=0) / counts[mask]
+ signature = np.vstack((signature, base_values[:, mask].mean(axis=0)))
+ # It's not clearn whether min_size could impact prediction
+ # performance
+ if signature is None\
+ or signature.shape[1] < self.min_signature_features:
+ raise ValueError("The classifier got None signature or the number "
+ "of sinature feature is less than minimum!")
+
+ self.signature_ = np.asarray(signature)
+ self.mask_ = mask
+ # TODO: support other discretize method: fixed value, upper
+ # third quater, etc.
+ self.discretize_value = y.mean() + y.std() * self.discretize
+ if iraps_core.negative_thres > iraps_core.positive_thres:
+ self.less_is_positive = True
+ else:
+ self.less_is_positive = False
+
+ return self
+
+ def _get_support_mask(self):
+ """
+ return mask of feature selection indices
+ """
+ check_is_fitted(self, 'mask_')
+
+ return self.mask_
+
+ def get_signature(self):
+ """
+ return signature
+ """
+ check_is_fitted(self, 'signature_')
+
+ return self.signature_
+
+ def predict(self, X):
+ """
+ compute the correlation coefficient with irpas signature
+ """
+ signature = self.get_signature()
+
+ X = as_float_array(X)
+ X_transformed = self.transform(X) - signature[1]
+ corrcoef = np.array(
+ [np.corrcoef(signature[0], e)[0][1] for e in X_transformed])
+ corrcoef[np.isnan(corrcoef)] = np.finfo(np.float32).min
+
+ return corrcoef
+
+ def predict_label(self, X, clf_cutoff=0.4):
+ return self.predict(X) >= clf_cutoff
+
+
+class BinarizeTargetClassifier(BaseEstimator, RegressorMixin):
+ """
+ Convert continuous target to binary labels (True and False)
+ and apply a classification estimator.
+
+ Parameters
+ ----------
+ classifier: object
+ Estimator object such as derived from sklearn `ClassifierMixin`.
+
+ z_score: float, default=-1.0
+ Threshold value based on z_score. Will be ignored when
+ fixed_value is set
+
+ value: float, default=None
+ Threshold value
+
+ less_is_positive: boolean, default=True
+ When target is less the threshold value, it will be converted
+ to True, False otherwise.
+
+ Attributes
+ ----------
+ classifier_: object
+ Fitted classifier
+
+ discretize_value: float
+ The threshold value used to discretize True and False targets
+ """
+
+ def __init__(self, classifier, z_score=-1, value=None,
+ less_is_positive=True):
+ self.classifier = classifier
+ self.z_score = z_score
+ self.value = value
+ self.less_is_positive = less_is_positive
+
+ def fit(self, X, y, sample_weight=None):
+ """
+ Convert y to True and False labels and then fit the classifier
+ with X and new y
+
+ Returns
+ ------
+ self: object
+ """
+ y = check_array(y, accept_sparse=False, force_all_finite=True,
+ ensure_2d=False, dtype='numeric')
+ y = column_or_1d(y)
+
+ if self.value is None:
+ discretize_value = y.mean() + y.std() * self.z_score
+ else:
+ discretize_value = self.Value
+ self.discretize_value = discretize_value
+
+ if self.less_is_positive:
+ y_trans = y < discretize_value
+ else:
+ y_trans = y > discretize_value
+
+ self.classifier_ = clone(self.classifier)
+
+ if sample_weight is not None:
+ self.classifier_.fit(X, y_trans, sample_weight=sample_weight)
+ else:
+ self.classifier_.fit(X, y_trans)
+
+ if hasattr(self.classifier_, 'feature_importances_'):
+ self.feature_importances_ = self.classifier_.feature_importances_
+ if hasattr(self.classifier_, 'coef_'):
+ self.coef_ = self.classifier_.coef_
+ if hasattr(self.classifier_, 'n_outputs_'):
+ self.n_outputs_ = self.classifier_.n_outputs_
+ if hasattr(self.classifier_, 'n_features_'):
+ self.n_features_ = self.classifier_.n_features_
+
+ return self
+
+ def predict(self, X):
+ """
+ Predict class probabilities of X.
+ """
+ check_is_fitted(self, 'classifier_')
+ proba = self.classifier_.predict_proba(X)
+ return proba[:, 1]
+
+ def predict_label(self, X):
+ """Predict class label of X
+ """
+ check_is_fitted(self, 'classifier_')
+ return self.classifier_.predict(X)
+
+
+class _BinarizeTargetProbaScorer(_BaseScorer):
+ """
+ base class to make binarized target specific scorer
+ """
+
+ def __call__(self, clf, X, y, sample_weight=None):
+ clf_name = clf.__class__.__name__
+ # support pipeline object
+ if isinstance(clf, Pipeline):
+ main_estimator = clf.steps[-1][-1]
+ # support stacking ensemble estimators
+ # TODO support nested pipeline/stacking estimators
+ elif clf_name in ['StackingCVClassifier', 'StackingClassifier']:
+ main_estimator = clf.meta_clf_
+ elif clf_name in ['StackingCVRegressor', 'StackingRegressor']:
+ main_estimator = clf.meta_regr_
+ else:
+ main_estimator = clf
+
+ discretize_value = main_estimator.discretize_value
+ less_is_positive = main_estimator.less_is_positive
+
+ if less_is_positive:
+ y_trans = y < discretize_value
+ else:
+ y_trans = y > discretize_value
+
+ y_pred = clf.predict(X)
+ if sample_weight is not None:
+ return self._sign * self._score_func(y_trans, y_pred,
+ sample_weight=sample_weight,
+ **self._kwargs)
+ else:
+ return self._sign * self._score_func(y_trans, y_pred,
+ **self._kwargs)
+
+
+# roc_auc
+binarize_auc_scorer =\
+ _BinarizeTargetProbaScorer(metrics.roc_auc_score, 1, {})
+
+# average_precision_scorer
+binarize_average_precision_scorer =\
+ _BinarizeTargetProbaScorer(metrics.average_precision_score, 1, {})
+
+# roc_auc_scorer
+iraps_auc_scorer = binarize_auc_scorer
+
+# average_precision_scorer
+iraps_average_precision_scorer = binarize_average_precision_scorer
+
+
+class BinarizeTargetRegressor(BaseEstimator, RegressorMixin):
+ """
+ Extend regression estimator to have discretize_value
+
+ Parameters
+ ----------
+ regressor: object
+ Estimator object such as derived from sklearn `RegressionMixin`.
+
+ z_score: float, default=-1.0
+ Threshold value based on z_score. Will be ignored when
+ fixed_value is set
+
+ value: float, default=None
+ Threshold value
+
+ less_is_positive: boolean, default=True
+ When target is less the threshold value, it will be converted
+ to True, False otherwise.
+
+ Attributes
+ ----------
+ regressor_: object
+ Fitted regressor
+
+ discretize_value: float
+ The threshold value used to discretize True and False targets
+ """
+
+ def __init__(self, regressor, z_score=-1, value=None,
+ less_is_positive=True):
+ self.regressor = regressor
+ self.z_score = z_score
+ self.value = value
+ self.less_is_positive = less_is_positive
+
+ def fit(self, X, y, sample_weight=None):
+ """
+ Calculate the discretize_value fit the regressor with traning data
+
+ Returns
+ ------
+ self: object
+ """
+ y = check_array(y, accept_sparse=False, force_all_finite=True,
+ ensure_2d=False, dtype='numeric')
+ y = column_or_1d(y)
+
+ if self.value is None:
+ discretize_value = y.mean() + y.std() * self.z_score
+ else:
+ discretize_value = self.Value
+ self.discretize_value = discretize_value
+
+ self.regressor_ = clone(self.regressor)
+
+ if sample_weight is not None:
+ self.regressor_.fit(X, y, sample_weight=sample_weight)
+ else:
+ self.regressor_.fit(X, y)
+
+ # attach classifier attributes
+ if hasattr(self.regressor_, 'feature_importances_'):
+ self.feature_importances_ = self.regressor_.feature_importances_
+ if hasattr(self.regressor_, 'coef_'):
+ self.coef_ = self.regressor_.coef_
+ if hasattr(self.regressor_, 'n_outputs_'):
+ self.n_outputs_ = self.regressor_.n_outputs_
+ if hasattr(self.regressor_, 'n_features_'):
+ self.n_features_ = self.regressor_.n_features_
+
+ return self
+
+ def predict(self, X):
+ """Predict target value of X
+ """
+ check_is_fitted(self, 'regressor_')
+ y_pred = self.regressor_.predict(X)
+ if not np.all((y_pred >= 0) & (y_pred <= 1)):
+ y_pred = (y_pred - y_pred.min()) / (y_pred.max() - y_pred.min())
+ if self.less_is_positive:
+ y_pred = 1 - y_pred
+ return y_pred
+
+
+# roc_auc_scorer
+regression_auc_scorer = binarize_auc_scorer
+
+# average_precision_scorer
+regression_average_precision_scorer = binarize_average_precision_scorer
diff -r 39ae276e75d9 -r e94395c672bd main_macros.xml
--- a/main_macros.xml Sun Dec 30 01:56:11 2018 -0500
+++ b/main_macros.xml Tue May 14 18:15:12 2019 -0400
@@ -1,14 +1,17 @@
- 1.0
+ 1.0.0.4
python
- scikit-learn
- pandas
+ scikit-learn
+ pandas
xgboost
asteval
-
+ skrebate
+ imbalanced-learn
+ mlxtend
+
@@ -352,10 +355,10 @@
-
+
-
+
@@ -428,7 +431,7 @@
-
+
@@ -823,6 +826,8 @@
+
+
@@ -872,6 +877,16 @@
+
+
+
+
+
+
+
+
+
+
@@ -929,7 +944,13 @@
-
+
@@ -943,6 +964,7 @@
+
@@ -991,7 +1013,7 @@
@@ -1047,13 +1069,47 @@
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
@@ -1061,7 +1117,7 @@
-
+
@@ -1071,23 +1127,29 @@
-
+
+
-
+
+
+
-
+
+
+
+
@@ -1105,7 +1167,7 @@
-
+
@@ -1139,6 +1201,8 @@
+
+
@@ -1167,6 +1231,8 @@
+
+
@@ -1206,63 +1272,48 @@
+
+
+
+
+
+
+
-
+
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+
@@ -1403,12 +1454,12 @@
-
+
-
-
+
+
@@ -1591,6 +1642,7 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff -r 39ae276e75d9 -r e94395c672bd model_validations.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/model_validations.py Tue May 14 18:15:12 2019 -0400
@@ -0,0 +1,252 @@
+"""
+class
+-----
+OrderedKFold
+RepeatedOrderedKold
+
+
+function
+--------
+train_test_split
+"""
+
+import numpy as np
+import warnings
+
+from itertools import chain
+from math import ceil, floor
+from sklearn.model_selection import (GroupShuffleSplit, ShuffleSplit,
+ StratifiedShuffleSplit)
+from sklearn.model_selection._split import _BaseKFold, _RepeatedSplits
+from sklearn.utils import check_random_state, indexable, safe_indexing
+from sklearn.utils.validation import _num_samples, check_array
+
+
+def _validate_shuffle_split(n_samples, test_size, train_size,
+ default_test_size=None):
+ """
+ Validation helper to check if the test/test sizes are meaningful wrt to the
+ size of the data (n_samples)
+ """
+ if test_size is None and train_size is None:
+ test_size = default_test_size
+
+ test_size_type = np.asarray(test_size).dtype.kind
+ train_size_type = np.asarray(train_size).dtype.kind
+
+ if (test_size_type == 'i' and (test_size >= n_samples or test_size <= 0)
+ or test_size_type == 'f' and (test_size <= 0 or test_size >= 1)):
+ raise ValueError('test_size={0} should be either positive and smaller'
+ ' than the number of samples {1} or a float in the '
+ '(0, 1) range'.format(test_size, n_samples))
+
+ if (train_size_type == 'i' and (train_size >= n_samples or train_size <= 0)
+ or train_size_type == 'f' and (train_size <= 0 or train_size >= 1)):
+ raise ValueError('train_size={0} should be either positive and smaller'
+ ' than the number of samples {1} or a float in the '
+ '(0, 1) range'.format(train_size, n_samples))
+
+ if train_size is not None and train_size_type not in ('i', 'f'):
+ raise ValueError("Invalid value for train_size: {}".format(train_size))
+ if test_size is not None and test_size_type not in ('i', 'f'):
+ raise ValueError("Invalid value for test_size: {}".format(test_size))
+
+ if (train_size_type == 'f' and test_size_type == 'f' and
+ train_size + test_size > 1):
+ raise ValueError(
+ 'The sum of test_size and train_size = {}, should be in the (0, 1)'
+ ' range. Reduce test_size and/or train_size.'
+ .format(train_size + test_size))
+
+ if test_size_type == 'f':
+ n_test = ceil(test_size * n_samples)
+ elif test_size_type == 'i':
+ n_test = float(test_size)
+
+ if train_size_type == 'f':
+ n_train = floor(train_size * n_samples)
+ elif train_size_type == 'i':
+ n_train = float(train_size)
+
+ if train_size is None:
+ n_train = n_samples - n_test
+ elif test_size is None:
+ n_test = n_samples - n_train
+
+ if n_train + n_test > n_samples:
+ raise ValueError('The sum of train_size and test_size = %d, '
+ 'should be smaller than the number of '
+ 'samples %d. Reduce test_size and/or '
+ 'train_size.' % (n_train + n_test, n_samples))
+
+ n_train, n_test = int(n_train), int(n_test)
+
+ if n_train == 0:
+ raise ValueError(
+ 'With n_samples={}, test_size={} and train_size={}, the '
+ 'resulting train set will be empty. Adjust any of the '
+ 'aforementioned parameters.'.format(n_samples, test_size,
+ train_size)
+ )
+
+ return n_train, n_test
+
+
+def train_test_split(*arrays, **options):
+ """Extend sklearn.model_selection.train_test_slit to have group split.
+
+ Parameters
+ ----------
+ *arrays : sequence of indexables with same length / shape[0]
+ Allowed inputs are lists, numpy arrays, scipy-sparse
+ matrices or pandas dataframes.
+
+ test_size : float, int or None, optional (default=None)
+ If float, should be between 0.0 and 1.0 and represent the proportion
+ of the dataset to include in the test split. If int, represents the
+ absolute number of test samples. If None, the value is set to the
+ complement of the train size. If ``train_size`` is also None, it will
+ be set to 0.25.
+
+ train_size : float, int, or None, (default=None)
+ If float, should be between 0.0 and 1.0 and represent the
+ proportion of the dataset to include in the train split. If
+ int, represents the absolute number of train samples. If None,
+ the value is automatically set to the complement of the test size.
+
+ random_state : int, RandomState instance or None, optional (default=None)
+ If int, random_state is the seed used by the random number generator;
+ If RandomState instance, random_state is the random number generator;
+ If None, the random number generator is the RandomState instance used
+ by `np.random`.
+
+ shuffle : None or str (default='simple')
+ How to shuffle the data before splitting.
+ None, no shuffle.
+ For str, one of 'simple', 'stratified' and 'group', corresponding to
+ `ShuffleSplit`, `StratifiedShuffleSplit` and `GroupShuffleSplit`,
+ respectively.
+
+ labels : array-like or None (default=None)
+ Ignored if shuffle is None or 'simple'.
+ When shuffle='stratified', this array is used as class labels.
+ When shuffle='group', this array is used as groups.
+
+ Returns
+ -------
+ splitting : list, length=2 * len(arrays)
+ List containing train-test split of inputs.
+
+ """
+ n_arrays = len(arrays)
+ if n_arrays == 0:
+ raise ValueError("At least one array required as input")
+ test_size = options.pop('test_size', None)
+ train_size = options.pop('train_size', None)
+ random_state = options.pop('random_state', None)
+ shuffle = options.pop('shuffle', 'simple')
+ labels = options.pop('labels', None)
+
+ if options:
+ raise TypeError("Invalid parameters passed: %s" % str(options))
+
+ arrays = indexable(*arrays)
+
+ n_samples = _num_samples(arrays[0])
+ if shuffle == 'group':
+ if labels is None:
+ raise ValueError("When shuffle='group', "
+ "labels should not be None!")
+ labels = check_array(labels, ensure_2d=False, dtype=None)
+ uniques = np.unique(labels)
+ n_samples = uniques.size
+
+ n_train, n_test = _validate_shuffle_split(n_samples, test_size, train_size,
+ default_test_size=0.25)
+
+ shuffle_options = dict(test_size=n_test,
+ train_size=n_train,
+ random_state=random_state)
+
+ if shuffle is None:
+ if labels is not None:
+ warnings.warn("The `labels` is ignored for "
+ "shuffle being None!")
+
+ train = np.arange(n_train)
+ test = np.arange(n_train, n_train + n_test)
+
+ elif shuffle == 'simple':
+ if labels is not None:
+ warnings.warn("The `labels` is not needed and therefore "
+ "ignored for ShuffleSplit, as shuffle='simple'!")
+
+ cv = ShuffleSplit(**shuffle_options)
+ train, test = next(cv.split(X=arrays[0], y=None))
+
+ elif shuffle == 'stratified':
+ cv = StratifiedShuffleSplit(**shuffle_options)
+ train, test = next(cv.split(X=arrays[0], y=labels))
+
+ elif shuffle == 'group':
+ cv = GroupShuffleSplit(**shuffle_options)
+ train, test = next(cv.split(X=arrays[0], y=None, groups=labels))
+
+ else:
+ raise ValueError("The argument `shuffle` only supports None, "
+ "'simple', 'stratified' and 'group', but got `%s`!"
+ % shuffle)
+
+ return list(chain.from_iterable((safe_indexing(a, train),
+ safe_indexing(a, test)) for a in arrays))
+
+
+class OrderedKFold(_BaseKFold):
+ """
+ Split into K fold based on ordered target value
+
+ Parameters
+ ----------
+ n_splits : int, default=3
+ Number of folds. Must be at least 2.
+ shuffle: bool
+ random_state: None or int
+ """
+
+ def __init__(self, n_splits=3, shuffle=False, random_state=None):
+ super(OrderedKFold, self).__init__(n_splits, shuffle, random_state)
+
+ def _iter_test_indices(self, X, y, groups=None):
+ n_samples = _num_samples(X)
+ n_splits = self.n_splits
+ y = np.asarray(y)
+ sorted_index = np.argsort(y)
+ if self.shuffle:
+ current = 0
+ rng = check_random_state(self.random_state)
+ for i in range(n_samples // int(n_splits)):
+ start, stop = current, current + n_splits
+ rng.shuffle(sorted_index[start:stop])
+ current = stop
+ rng.shuffle(sorted_index[current:])
+
+ for i in range(n_splits):
+ yield sorted_index[i:n_samples:n_splits]
+
+
+class RepeatedOrderedKFold(_RepeatedSplits):
+ """ Repeated OrderedKFold runs mutiple times with different randomization.
+
+ Parameters
+ ----------
+ n_splits : int, default=5
+ Number of folds. Must be at least 2.
+
+ n_repeats : int, default=5
+ Number of times cross-validator to be repeated.
+
+ random_state: int, RandomState instance or None. Optional
+ """
+ def __init__(self, n_splits=5, n_repeats=5, random_state=None):
+ super(RepeatedOrderedKFold, self).__init__(
+ OrderedKFold, n_repeats, random_state, n_splits=n_splits)
diff -r 39ae276e75d9 -r e94395c672bd pk_whitelist.json
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/pk_whitelist.json Tue May 14 18:15:12 2019 -0400
@@ -0,0 +1,768 @@
+{ "SK_NAMES": [
+ "sklearn._ASSUME_FINITE", "sklearn._isotonic._inplace_contiguous_isotonic_regression",
+ "sklearn._isotonic._make_unique", "sklearn.base.BaseEstimator",
+ "sklearn.base.BiclusterMixin", "sklearn.base.ClassifierMixin",
+ "sklearn.base.ClusterMixin", "sklearn.base.DensityMixin",
+ "sklearn.base.MetaEstimatorMixin", "sklearn.base.RegressorMixin",
+ "sklearn.base.TransformerMixin", "sklearn.base._first_and_last_element",
+ "sklearn.base._pprint", "sklearn.base.clone",
+ "sklearn.base.is_classifier", "sklearn.base.is_regressor",
+ "sklearn.clone", "sklearn.cluster.AffinityPropagation",
+ "sklearn.cluster.AgglomerativeClustering", "sklearn.cluster.Birch",
+ "sklearn.cluster.DBSCAN", "sklearn.cluster.FeatureAgglomeration",
+ "sklearn.cluster.KMeans", "sklearn.cluster.MeanShift",
+ "sklearn.cluster.MiniBatchKMeans", "sklearn.cluster.SpectralBiclustering",
+ "sklearn.cluster.SpectralClustering", "sklearn.cluster.SpectralCoclustering",
+ "sklearn.cluster._dbscan_inner.dbscan_inner", "sklearn.cluster._feature_agglomeration.AgglomerationTransform",
+ "sklearn.cluster._hierarchical.WeightedEdge", "sklearn.cluster._hierarchical._get_parents",
+ "sklearn.cluster._hierarchical._hc_get_descendent", "sklearn.cluster._hierarchical.average_merge",
+ "sklearn.cluster._hierarchical.compute_ward_dist", "sklearn.cluster._hierarchical.hc_get_heads",
+ "sklearn.cluster._hierarchical.max_merge", "sklearn.cluster._k_means._assign_labels_array",
+ "sklearn.cluster._k_means._assign_labels_csr", "sklearn.cluster._k_means._centers_dense",
+ "sklearn.cluster._k_means._centers_sparse", "sklearn.cluster._k_means._mini_batch_update_csr",
+ "sklearn.cluster._k_means_elkan.k_means_elkan", "sklearn.cluster.affinity_propagation",
+ "sklearn.cluster.affinity_propagation_.AffinityPropagation", "sklearn.cluster.affinity_propagation_.affinity_propagation",
+ "sklearn.cluster.bicluster.BaseSpectral", "sklearn.cluster.bicluster.SpectralBiclustering",
+ "sklearn.cluster.bicluster.SpectralCoclustering", "sklearn.cluster.bicluster._bistochastic_normalize",
+ "sklearn.cluster.bicluster._log_normalize", "sklearn.cluster.bicluster._scale_normalize",
+ "sklearn.cluster.birch.Birch", "sklearn.cluster.birch._CFNode",
+ "sklearn.cluster.birch._CFSubcluster", "sklearn.cluster.birch._iterate_sparse_X",
+ "sklearn.cluster.birch._split_node", "sklearn.cluster.dbscan",
+ "sklearn.cluster.dbscan_.DBSCAN", "sklearn.cluster.dbscan_.dbscan",
+ "sklearn.cluster.estimate_bandwidth", "sklearn.cluster.get_bin_seeds",
+ "sklearn.cluster.hierarchical.AgglomerativeClustering", "sklearn.cluster.hierarchical.FeatureAgglomeration",
+ "sklearn.cluster.hierarchical._TREE_BUILDERS", "sklearn.cluster.hierarchical._average_linkage",
+ "sklearn.cluster.hierarchical._complete_linkage", "sklearn.cluster.hierarchical._fix_connectivity",
+ "sklearn.cluster.hierarchical._hc_cut", "sklearn.cluster.hierarchical.linkage_tree",
+ "sklearn.cluster.hierarchical.ward_tree", "sklearn.cluster.k_means",
+ "sklearn.cluster.k_means_.FLOAT_DTYPES", "sklearn.cluster.k_means_.KMeans",
+ "sklearn.cluster.k_means_.MiniBatchKMeans", "sklearn.cluster.k_means_._init_centroids",
+ "sklearn.cluster.k_means_._k_init", "sklearn.cluster.k_means_._kmeans_single_elkan",
+ "sklearn.cluster.k_means_._kmeans_single_lloyd", "sklearn.cluster.k_means_._labels_inertia",
+ "sklearn.cluster.k_means_._labels_inertia_precompute_dense", "sklearn.cluster.k_means_._mini_batch_convergence",
+ "sklearn.cluster.k_means_._mini_batch_step", "sklearn.cluster.k_means_._tolerance",
+ "sklearn.cluster.k_means_._validate_center_shape", "sklearn.cluster.k_means_.k_means",
+ "sklearn.cluster.k_means_.string_types", "sklearn.cluster.linkage_tree",
+ "sklearn.cluster.mean_shift", "sklearn.cluster.mean_shift_.MeanShift",
+ "sklearn.cluster.mean_shift_._mean_shift_single_seed", "sklearn.cluster.mean_shift_.estimate_bandwidth",
+ "sklearn.cluster.mean_shift_.get_bin_seeds", "sklearn.cluster.mean_shift_.mean_shift",
+ "sklearn.cluster.spectral.SpectralClustering", "sklearn.cluster.spectral.discretize",
+ "sklearn.cluster.spectral.spectral_clustering", "sklearn.cluster.spectral_clustering",
+ "sklearn.cluster.ward_tree", "sklearn.config_context", "sklearn.compose.TransformedTargetRegressor",
+ "sklearn.compose._target.TransformedTargetRegressor", "sklearn.compose.ColumnTransformer",
+ "sklearn.compose._column_transformer.ColumnTransformer", "sklearn.compose.make_column_transformer",
+ "sklearn.compose._column_transformer.make_column_transformer",
+ "sklearn.covariance.EllipticEnvelope", "sklearn.covariance.EmpiricalCovariance",
+ "sklearn.covariance.GraphLasso", "sklearn.covariance.GraphLassoCV",
+ "sklearn.covariance.LedoitWolf", "sklearn.covariance.MinCovDet",
+ "sklearn.covariance.OAS", "sklearn.covariance.ShrunkCovariance",
+ "sklearn.covariance.empirical_covariance", "sklearn.covariance.empirical_covariance_.EmpiricalCovariance",
+ "sklearn.covariance.empirical_covariance_.empirical_covariance", "sklearn.covariance.empirical_covariance_.log_likelihood",
+ "sklearn.covariance.fast_mcd", "sklearn.covariance.graph_lasso",
+ "sklearn.covariance.graph_lasso_.GraphLasso", "sklearn.covariance.graph_lasso_.GraphLassoCV",
+ "sklearn.covariance.graph_lasso_._dual_gap", "sklearn.covariance.graph_lasso_._objective",
+ "sklearn.covariance.graph_lasso_.alpha_max", "sklearn.covariance.graph_lasso_.graph_lasso",
+ "sklearn.covariance.graph_lasso_.graph_lasso_path", "sklearn.covariance.ledoit_wolf",
+ "sklearn.covariance.ledoit_wolf_shrinkage", "sklearn.covariance.log_likelihood",
+ "sklearn.covariance.oas", "sklearn.covariance.outlier_detection.EllipticEnvelope",
+ "sklearn.covariance.robust_covariance.MinCovDet", "sklearn.covariance.robust_covariance._c_step",
+ "sklearn.covariance.robust_covariance.c_step", "sklearn.covariance.robust_covariance.fast_mcd",
+ "sklearn.covariance.robust_covariance.select_candidates", "sklearn.covariance.shrunk_covariance",
+ "sklearn.covariance.shrunk_covariance_.LedoitWolf", "sklearn.covariance.shrunk_covariance_.OAS",
+ "sklearn.covariance.shrunk_covariance_.ShrunkCovariance", "sklearn.covariance.shrunk_covariance_.ledoit_wolf",
+ "sklearn.covariance.shrunk_covariance_.ledoit_wolf_shrinkage", "sklearn.covariance.shrunk_covariance_.oas",
+ "sklearn.covariance.shrunk_covariance_.shrunk_covariance", "sklearn.decomposition.DictionaryLearning",
+ "sklearn.decomposition.FactorAnalysis", "sklearn.decomposition.FastICA",
+ "sklearn.decomposition.IncrementalPCA", "sklearn.decomposition.KernelPCA",
+ "sklearn.decomposition.LatentDirichletAllocation", "sklearn.decomposition.MiniBatchDictionaryLearning",
+ "sklearn.decomposition.MiniBatchSparsePCA", "sklearn.decomposition.NMF",
+ "sklearn.decomposition.PCA", "sklearn.decomposition.RandomizedPCA",
+ "sklearn.decomposition.SparseCoder", "sklearn.decomposition.SparsePCA",
+ "sklearn.decomposition.TruncatedSVD", "sklearn.decomposition._online_lda._dirichlet_expectation_1d",
+ "sklearn.decomposition._online_lda._dirichlet_expectation_2d", "sklearn.decomposition._online_lda.mean_change",
+ "sklearn.decomposition.base._BasePCA", "sklearn.decomposition.cdnmf_fast._update_cdnmf_fast",
+ "sklearn.decomposition.dict_learning", "sklearn.decomposition.dict_learning_online",
+ "sklearn.decomposition.factor_analysis.FactorAnalysis", "sklearn.decomposition.fastica",
+ "sklearn.decomposition.fastica_.FLOAT_DTYPES", "sklearn.decomposition.fastica_.FastICA",
+ "sklearn.decomposition.fastica_._cube", "sklearn.decomposition.fastica_._exp",
+ "sklearn.decomposition.fastica_._gs_decorrelation", "sklearn.decomposition.fastica_._ica_def",
+ "sklearn.decomposition.fastica_._ica_par", "sklearn.decomposition.fastica_._logcosh",
+ "sklearn.decomposition.fastica_._sym_decorrelation", "sklearn.decomposition.fastica_.fastica",
+ "sklearn.decomposition.fastica_.string_types", "sklearn.decomposition.incremental_pca.IncrementalPCA",
+ "sklearn.decomposition.kernel_pca.KernelPCA", "sklearn.decomposition.nmf.EPSILON",
+ "sklearn.decomposition.nmf.INTEGER_TYPES", "sklearn.decomposition.nmf.NMF",
+ "sklearn.decomposition.nmf._beta_divergence", "sklearn.decomposition.nmf._beta_loss_to_float",
+ "sklearn.decomposition.nmf._check_init", "sklearn.decomposition.nmf._check_string_param",
+ "sklearn.decomposition.nmf._compute_regularization", "sklearn.decomposition.nmf._fit_coordinate_descent",
+ "sklearn.decomposition.nmf._fit_multiplicative_update", "sklearn.decomposition.nmf._initialize_nmf",
+ "sklearn.decomposition.nmf._multiplicative_update_h", "sklearn.decomposition.nmf._multiplicative_update_w",
+ "sklearn.decomposition.nmf._special_sparse_dot", "sklearn.decomposition.nmf._update_coordinate_descent",
+ "sklearn.decomposition.nmf.non_negative_factorization", "sklearn.decomposition.nmf.norm",
+ "sklearn.decomposition.nmf.trace_dot", "sklearn.decomposition.non_negative_factorization",
+ "sklearn.decomposition.online_lda.EPS", "sklearn.decomposition.online_lda.LatentDirichletAllocation",
+ "sklearn.decomposition.online_lda._update_doc_distribution", "sklearn.decomposition.online_lda.gammaln",
+ "sklearn.decomposition.pca.PCA", "sklearn.decomposition.pca.RandomizedPCA",
+ "sklearn.decomposition.pca._assess_dimension_", "sklearn.decomposition.pca._infer_dimension_",
+ "sklearn.decomposition.pca.gammaln", "sklearn.decomposition.sparse_encode",
+ "sklearn.decomposition.sparse_pca.MiniBatchSparsePCA", "sklearn.decomposition.sparse_pca.SparsePCA",
+ "sklearn.decomposition.truncated_svd.TruncatedSVD", "sklearn.discriminant_analysis.LinearDiscriminantAnalysis",
+ "sklearn.discriminant_analysis.QuadraticDiscriminantAnalysis", "sklearn.discriminant_analysis._class_cov",
+ "sklearn.discriminant_analysis._class_means", "sklearn.discriminant_analysis._cov",
+ "sklearn.discriminant_analysis.string_types", "sklearn.ensemble.AdaBoostClassifier",
+ "sklearn.ensemble.AdaBoostRegressor", "sklearn.ensemble.BaggingClassifier",
+ "sklearn.ensemble.BaggingRegressor", "sklearn.ensemble.BaseEnsemble",
+ "sklearn.ensemble.ExtraTreesClassifier", "sklearn.ensemble.ExtraTreesRegressor",
+ "sklearn.ensemble.GradientBoostingClassifier", "sklearn.ensemble.GradientBoostingRegressor",
+ "sklearn.ensemble.IsolationForest", "sklearn.ensemble.RandomForestClassifier",
+ "sklearn.ensemble.RandomForestRegressor", "sklearn.ensemble.RandomTreesEmbedding",
+ "sklearn.ensemble.VotingClassifier", "sklearn.ensemble._gradient_boosting._partial_dependence_tree",
+ "sklearn.ensemble._gradient_boosting._predict_regression_tree_stages_sparse", "sklearn.ensemble._gradient_boosting._random_sample_mask",
+ "sklearn.ensemble._gradient_boosting.predict_stage", "sklearn.ensemble._gradient_boosting.predict_stages",
+ "sklearn.ensemble.bagging.BaggingClassifier", "sklearn.ensemble.bagging.BaggingRegressor",
+ "sklearn.ensemble.bagging.BaseBagging", "sklearn.ensemble.bagging.MAX_INT",
+ "sklearn.ensemble.bagging._generate_bagging_indices", "sklearn.ensemble.bagging._generate_indices",
+ "sklearn.ensemble.bagging._parallel_build_estimators", "sklearn.ensemble.bagging._parallel_decision_function",
+ "sklearn.ensemble.bagging._parallel_predict_log_proba", "sklearn.ensemble.bagging._parallel_predict_proba",
+ "sklearn.ensemble.bagging._parallel_predict_regression", "sklearn.ensemble.base.BaseEnsemble",
+ "sklearn.ensemble.base.MAX_RAND_SEED", "sklearn.ensemble.base._partition_estimators",
+ "sklearn.ensemble.base._set_random_states", "sklearn.ensemble.forest.BaseForest",
+ "sklearn.ensemble.forest.ExtraTreesClassifier", "sklearn.ensemble.forest.ExtraTreesRegressor",
+ "sklearn.ensemble.forest.ForestClassifier", "sklearn.ensemble.forest.ForestRegressor",
+ "sklearn.ensemble.forest.MAX_INT", "sklearn.ensemble.forest.RandomForestClassifier",
+ "sklearn.ensemble.forest.RandomForestRegressor", "sklearn.ensemble.forest.RandomTreesEmbedding",
+ "sklearn.ensemble.forest._generate_sample_indices", "sklearn.ensemble.forest._generate_unsampled_indices",
+ "sklearn.ensemble.forest._parallel_build_trees", "sklearn.ensemble.forest.accumulate_prediction",
+ "sklearn.ensemble.gradient_boosting.BaseGradientBoosting", "sklearn.ensemble.gradient_boosting.BinomialDeviance",
+ "sklearn.ensemble.gradient_boosting.ClassificationLossFunction", "sklearn.ensemble.gradient_boosting.ExponentialLoss",
+ "sklearn.ensemble.gradient_boosting.GradientBoostingClassifier", "sklearn.ensemble.gradient_boosting.GradientBoostingRegressor",
+ "sklearn.ensemble.gradient_boosting.HuberLossFunction", "sklearn.ensemble.gradient_boosting.INIT_ESTIMATORS",
+ "sklearn.ensemble.gradient_boosting.LOSS_FUNCTIONS", "sklearn.ensemble.gradient_boosting.LeastAbsoluteError",
+ "sklearn.ensemble.gradient_boosting.LeastSquaresError", "sklearn.ensemble.gradient_boosting.LogOddsEstimator",
+ "sklearn.ensemble.gradient_boosting.LossFunction", "sklearn.ensemble.gradient_boosting.MeanEstimator",
+ "sklearn.ensemble.gradient_boosting.MultinomialDeviance", "sklearn.ensemble.gradient_boosting.PriorProbabilityEstimator",
+ "sklearn.ensemble.gradient_boosting.QuantileEstimator", "sklearn.ensemble.gradient_boosting.QuantileLossFunction",
+ "sklearn.ensemble.gradient_boosting.RegressionLossFunction", "sklearn.ensemble.gradient_boosting.ScaledLogOddsEstimator",
+ "sklearn.ensemble.gradient_boosting.TREE_LEAF", "sklearn.ensemble.gradient_boosting.VerboseReporter",
+ "sklearn.ensemble.gradient_boosting.ZeroEstimator", "sklearn.ensemble.gradient_boosting.expit",
+ "sklearn.ensemble.iforest.INTEGER_TYPES", "sklearn.ensemble.iforest.IsolationForest",
+ "sklearn.ensemble.iforest._average_path_length", "sklearn.ensemble.iforest.euler_gamma",
+ "sklearn.ensemble.partial_dependence._grid_from_X", "sklearn.ensemble.partial_dependence.partial_dependence",
+ "sklearn.ensemble.partial_dependence.plot_partial_dependence", "sklearn.ensemble.voting_classifier.VotingClassifier",
+ "sklearn.ensemble.voting_classifier._parallel_fit_estimator", "sklearn.ensemble.weight_boosting.AdaBoostClassifier",
+ "sklearn.ensemble.weight_boosting.AdaBoostRegressor", "sklearn.ensemble.weight_boosting.BaseWeightBoosting",
+ "sklearn.ensemble.weight_boosting._samme_proba", "sklearn.ensemble.weight_boosting.inner1d",
+ "sklearn.feature_extraction.DictVectorizer", "sklearn.feature_extraction.FeatureHasher",
+ "sklearn.feature_extraction._hashing.transform", "sklearn.feature_extraction.dict_vectorizer.DictVectorizer",
+ "sklearn.feature_extraction.dict_vectorizer._tosequence", "sklearn.feature_extraction.grid_to_graph",
+ "sklearn.feature_extraction.hashing.FeatureHasher", "sklearn.feature_extraction.hashing._iteritems",
+ "sklearn.feature_extraction.image.PatchExtractor", "sklearn.feature_extraction.image._compute_gradient_3d",
+ "sklearn.feature_extraction.image._compute_n_patches", "sklearn.feature_extraction.image._make_edges_3d",
+ "sklearn.feature_extraction.image._mask_edges_weights", "sklearn.feature_extraction.image._to_graph",
+ "sklearn.feature_extraction.image.extract_patches", "sklearn.feature_extraction.image.extract_patches_2d",
+ "sklearn.feature_extraction.image.grid_to_graph", "sklearn.feature_extraction.image.img_to_graph",
+ "sklearn.feature_extraction.image.reconstruct_from_patches_2d", "sklearn.feature_extraction.img_to_graph",
+ "sklearn.feature_extraction.stop_words.ENGLISH_STOP_WORDS", "sklearn.feature_extraction.text.CountVectorizer",
+ "sklearn.feature_extraction.text.ENGLISH_STOP_WORDS", "sklearn.feature_extraction.text.HashingVectorizer",
+ "sklearn.feature_extraction.text.TfidfTransformer", "sklearn.feature_extraction.text.TfidfVectorizer",
+ "sklearn.feature_extraction.text.VectorizerMixin", "sklearn.feature_extraction.text._check_stop_list",
+ "sklearn.feature_extraction.text._document_frequency", "sklearn.feature_extraction.text._make_int_array",
+ "sklearn.feature_extraction.text.strip_accents_ascii", "sklearn.feature_extraction.text.strip_accents_unicode",
+ "sklearn.feature_extraction.text.strip_tags", "sklearn.feature_selection.GenericUnivariateSelect",
+ "sklearn.feature_selection.RFE", "sklearn.feature_selection.RFECV",
+ "sklearn.feature_selection.SelectFdr", "sklearn.feature_selection.SelectFpr",
+ "sklearn.feature_selection.SelectFromModel", "sklearn.feature_selection.SelectFwe",
+ "sklearn.feature_selection.SelectKBest", "sklearn.feature_selection.SelectPercentile",
+ "sklearn.feature_selection.VarianceThreshold", "sklearn.feature_selection.base.SelectorMixin",
+ "sklearn.feature_selection.chi2", "sklearn.feature_selection.f_classif",
+ "sklearn.feature_selection.f_oneway", "sklearn.feature_selection.f_regression",
+ "sklearn.feature_selection.from_model.SelectFromModel", "sklearn.feature_selection.from_model._calculate_threshold",
+ "sklearn.feature_selection.from_model._get_feature_importances", "sklearn.feature_selection.mutual_info_._compute_mi",
+ "sklearn.feature_selection.mutual_info_._compute_mi_cc", "sklearn.feature_selection.mutual_info_._compute_mi_cd",
+ "sklearn.feature_selection.mutual_info_._estimate_mi", "sklearn.feature_selection.mutual_info_._iterate_columns",
+ "sklearn.feature_selection.mutual_info_.digamma", "sklearn.feature_selection.mutual_info_.mutual_info_classif",
+ "sklearn.feature_selection.mutual_info_.mutual_info_regression", "sklearn.feature_selection.mutual_info_classif",
+ "sklearn.feature_selection.mutual_info_regression", "sklearn.feature_selection.rfe.RFE",
+ "sklearn.feature_selection.rfe.RFECV", "sklearn.feature_selection.rfe._rfe_single_fit",
+ "sklearn.feature_selection.univariate_selection.GenericUnivariateSelect", "sklearn.feature_selection.univariate_selection.SelectFdr",
+ "sklearn.feature_selection.univariate_selection.SelectFpr", "sklearn.feature_selection.univariate_selection.SelectFwe",
+ "sklearn.feature_selection.univariate_selection.SelectKBest", "sklearn.feature_selection.univariate_selection.SelectPercentile",
+ "sklearn.feature_selection.univariate_selection._BaseFilter", "sklearn.feature_selection.univariate_selection._chisquare",
+ "sklearn.feature_selection.univariate_selection._clean_nans", "sklearn.feature_selection.univariate_selection.chi2",
+ "sklearn.feature_selection.univariate_selection.f_classif", "sklearn.feature_selection.univariate_selection.f_oneway",
+ "sklearn.feature_selection.univariate_selection.f_regression", "sklearn.feature_selection.variance_threshold.VarianceThreshold",
+ "sklearn.gaussian_process.GaussianProcess", "sklearn.gaussian_process.GaussianProcessClassifier",
+ "sklearn.gaussian_process.GaussianProcessRegressor", "sklearn.gaussian_process.correlation_models.absolute_exponential",
+ "sklearn.gaussian_process.correlation_models.cubic", "sklearn.gaussian_process.correlation_models.generalized_exponential",
+ "sklearn.gaussian_process.correlation_models.linear", "sklearn.gaussian_process.correlation_models.pure_nugget",
+ "sklearn.gaussian_process.correlation_models.squared_exponential", "sklearn.gaussian_process.gaussian_process.GaussianProcess",
+ "sklearn.gaussian_process.gaussian_process.MACHINE_EPSILON", "sklearn.gaussian_process.gaussian_process.l1_cross_distances",
+ "sklearn.gaussian_process.gpc.COEFS", "sklearn.gaussian_process.gpc.GaussianProcessClassifier",
+ "sklearn.gaussian_process.gpc.LAMBDAS", "sklearn.gaussian_process.gpc._BinaryGaussianProcessClassifierLaplace",
+ "sklearn.gaussian_process.gpc.erf", "sklearn.gaussian_process.gpc.expit",
+ "sklearn.gaussian_process.gpr.GaussianProcessRegressor", "sklearn.gaussian_process.kernels.CompoundKernel",
+ "sklearn.gaussian_process.kernels.ConstantKernel", "sklearn.gaussian_process.kernels.DotProduct",
+ "sklearn.gaussian_process.kernels.ExpSineSquared", "sklearn.gaussian_process.kernels.Exponentiation",
+ "sklearn.gaussian_process.kernels.Hyperparameter", "sklearn.gaussian_process.kernels.Kernel",
+ "sklearn.gaussian_process.kernels.KernelOperator", "sklearn.gaussian_process.kernels.Matern",
+ "sklearn.gaussian_process.kernels.NormalizedKernelMixin", "sklearn.gaussian_process.kernels.PairwiseKernel",
+ "sklearn.gaussian_process.kernels.Product", "sklearn.gaussian_process.kernels.RBF",
+ "sklearn.gaussian_process.kernels.RationalQuadratic", "sklearn.gaussian_process.kernels.StationaryKernelMixin",
+ "sklearn.gaussian_process.kernels.Sum", "sklearn.gaussian_process.kernels.WhiteKernel",
+ "sklearn.gaussian_process.kernels._approx_fprime", "sklearn.gaussian_process.kernels._check_length_scale",
+ "sklearn.gaussian_process.kernels.gamma", "sklearn.gaussian_process.kernels.kv",
+ "sklearn.gaussian_process.regression_models.constant", "sklearn.gaussian_process.regression_models.linear",
+ "sklearn.gaussian_process.regression_models.quadratic", "sklearn.get_config",
+ "sklearn.isotonic.IsotonicRegression", "sklearn.isotonic.check_increasing",
+ "sklearn.isotonic.isotonic_regression", "sklearn.kernel_approximation.AdditiveChi2Sampler",
+ "sklearn.kernel_approximation.KERNEL_PARAMS", "sklearn.kernel_approximation.Nystroem",
+ "sklearn.kernel_approximation.RBFSampler", "sklearn.kernel_approximation.SkewedChi2Sampler",
+ "sklearn.kernel_ridge.KernelRidge", "sklearn.linear_model.ARDRegression",
+ "sklearn.linear_model.BayesianRidge", "sklearn.linear_model.ElasticNet",
+ "sklearn.linear_model.ElasticNetCV", "sklearn.linear_model.Hinge",
+ "sklearn.linear_model.Huber", "sklearn.linear_model.HuberRegressor",
+ "sklearn.linear_model.Lars", "sklearn.linear_model.LarsCV",
+ "sklearn.linear_model.Lasso", "sklearn.linear_model.LassoCV",
+ "sklearn.linear_model.LassoLars", "sklearn.linear_model.LassoLarsCV",
+ "sklearn.linear_model.LassoLarsIC", "sklearn.linear_model.LinearRegression",
+ "sklearn.linear_model.Log", "sklearn.linear_model.LogisticRegression",
+ "sklearn.linear_model.LogisticRegressionCV", "sklearn.linear_model.ModifiedHuber",
+ "sklearn.linear_model.MultiTaskElasticNet", "sklearn.linear_model.MultiTaskElasticNetCV",
+ "sklearn.linear_model.MultiTaskLasso", "sklearn.linear_model.MultiTaskLassoCV",
+ "sklearn.linear_model.OrthogonalMatchingPursuit", "sklearn.linear_model.OrthogonalMatchingPursuitCV",
+ "sklearn.linear_model.PassiveAggressiveClassifier", "sklearn.linear_model.PassiveAggressiveRegressor",
+ "sklearn.linear_model.Perceptron", "sklearn.linear_model.RANSACRegressor",
+ "sklearn.linear_model.RandomizedLasso", "sklearn.linear_model.RandomizedLogisticRegression",
+ "sklearn.linear_model.Ridge", "sklearn.linear_model.RidgeCV",
+ "sklearn.linear_model.RidgeClassifier", "sklearn.linear_model.RidgeClassifierCV",
+ "sklearn.linear_model.SGDClassifier", "sklearn.linear_model.SGDRegressor",
+ "sklearn.linear_model.SquaredLoss", "sklearn.linear_model.TheilSenRegressor",
+ "sklearn.linear_model.base.FLOAT_DTYPES", "sklearn.linear_model.base.LinearClassifierMixin",
+ "sklearn.linear_model.base.LinearModel", "sklearn.linear_model.base.LinearRegression",
+ "sklearn.linear_model.base.SPARSE_INTERCEPT_DECAY", "sklearn.linear_model.base.SparseCoefMixin",
+ "sklearn.linear_model.base._pre_fit", "sklearn.linear_model.base._preprocess_data",
+ "sklearn.linear_model.base._rescale_data", "sklearn.linear_model.base.center_data",
+ "sklearn.linear_model.base.make_dataset", "sklearn.linear_model.base.sparse_center_data",
+ "sklearn.linear_model.bayes.ARDRegression", "sklearn.linear_model.bayes.BayesianRidge",
+ "sklearn.linear_model.cd_fast.enet_coordinate_descent", "sklearn.linear_model.cd_fast.enet_coordinate_descent_gram",
+ "sklearn.linear_model.cd_fast.enet_coordinate_descent_multi_task", "sklearn.linear_model.cd_fast.sparse_enet_coordinate_descent",
+ "sklearn.linear_model.coordinate_descent.ElasticNet", "sklearn.linear_model.coordinate_descent.ElasticNetCV",
+ "sklearn.linear_model.coordinate_descent.Lasso", "sklearn.linear_model.coordinate_descent.LassoCV",
+ "sklearn.linear_model.coordinate_descent.LinearModelCV", "sklearn.linear_model.coordinate_descent.MultiTaskElasticNet",
+ "sklearn.linear_model.coordinate_descent.MultiTaskElasticNetCV", "sklearn.linear_model.coordinate_descent.MultiTaskLasso",
+ "sklearn.linear_model.coordinate_descent.MultiTaskLassoCV", "sklearn.linear_model.coordinate_descent._alpha_grid",
+ "sklearn.linear_model.coordinate_descent._path_residuals", "sklearn.linear_model.coordinate_descent.enet_path",
+ "sklearn.linear_model.coordinate_descent.lasso_path", "sklearn.linear_model.enet_path",
+ "sklearn.linear_model.huber.HuberRegressor", "sklearn.linear_model.huber._huber_loss_and_gradient",
+ "sklearn.linear_model.lars_path", "sklearn.linear_model.lasso_path",
+ "sklearn.linear_model.lasso_stability_path", "sklearn.linear_model.least_angle.Lars",
+ "sklearn.linear_model.least_angle.LarsCV", "sklearn.linear_model.least_angle.LassoLars",
+ "sklearn.linear_model.least_angle.LassoLarsCV", "sklearn.linear_model.least_angle.LassoLarsIC",
+ "sklearn.linear_model.least_angle._check_copy_and_writeable", "sklearn.linear_model.least_angle._lars_path_residues",
+ "sklearn.linear_model.least_angle.lars_path", "sklearn.linear_model.least_angle.solve_triangular_args",
+ "sklearn.linear_model.least_angle.string_types", "sklearn.linear_model.logistic.LogisticRegression",
+ "sklearn.linear_model.logistic.LogisticRegressionCV", "sklearn.linear_model.logistic.SCORERS",
+ "sklearn.linear_model.logistic._check_solver_option", "sklearn.linear_model.logistic._intercept_dot",
+ "sklearn.linear_model.logistic._log_reg_scoring_path", "sklearn.linear_model.logistic._logistic_grad_hess",
+ "sklearn.linear_model.logistic._logistic_loss", "sklearn.linear_model.logistic._logistic_loss_and_grad",
+ "sklearn.linear_model.logistic._multinomial_grad_hess", "sklearn.linear_model.logistic._multinomial_loss",
+ "sklearn.linear_model.logistic._multinomial_loss_grad", "sklearn.linear_model.logistic.expit",
+ "sklearn.linear_model.logistic.logistic_regression_path", "sklearn.linear_model.logistic_regression_path",
+ "sklearn.linear_model.omp.OrthogonalMatchingPursuit", "sklearn.linear_model.omp.OrthogonalMatchingPursuitCV",
+ "sklearn.linear_model.omp._cholesky_omp", "sklearn.linear_model.omp._gram_omp",
+ "sklearn.linear_model.omp._omp_path_residues", "sklearn.linear_model.omp.orthogonal_mp",
+ "sklearn.linear_model.omp.orthogonal_mp_gram", "sklearn.linear_model.omp.premature",
+ "sklearn.linear_model.omp.solve_triangular_args", "sklearn.linear_model.orthogonal_mp",
+ "sklearn.linear_model.orthogonal_mp_gram", "sklearn.linear_model.passive_aggressive.DEFAULT_EPSILON",
+ "sklearn.linear_model.passive_aggressive.PassiveAggressiveClassifier", "sklearn.linear_model.passive_aggressive.PassiveAggressiveRegressor",
+ "sklearn.linear_model.perceptron.Perceptron", "sklearn.linear_model.randomized_l1.BaseRandomizedLinearModel",
+ "sklearn.linear_model.randomized_l1.RandomizedLasso", "sklearn.linear_model.randomized_l1.RandomizedLogisticRegression",
+ "sklearn.linear_model.randomized_l1._lasso_stability_path", "sklearn.linear_model.randomized_l1._randomized_lasso",
+ "sklearn.linear_model.randomized_l1._randomized_logistic", "sklearn.linear_model.randomized_l1._resample_model",
+ "sklearn.linear_model.randomized_l1.lasso_stability_path", "sklearn.linear_model.ransac.RANSACRegressor",
+ "sklearn.linear_model.ransac._EPSILON", "sklearn.linear_model.ransac._dynamic_max_trials",
+ "sklearn.linear_model.ridge.Ridge", "sklearn.linear_model.ridge.RidgeCV",
+ "sklearn.linear_model.ridge.RidgeClassifier", "sklearn.linear_model.ridge.RidgeClassifierCV",
+ "sklearn.linear_model.ridge._BaseRidge", "sklearn.linear_model.ridge._BaseRidgeCV",
+ "sklearn.linear_model.ridge._RidgeGCV", "sklearn.linear_model.ridge._solve_cholesky",
+ "sklearn.linear_model.ridge._solve_cholesky_kernel", "sklearn.linear_model.ridge._solve_lsqr",
+ "sklearn.linear_model.ridge._solve_sparse_cg", "sklearn.linear_model.ridge._solve_svd",
+ "sklearn.linear_model.ridge.ridge_regression", "sklearn.linear_model.ridge_regression",
+ "sklearn.linear_model.sag.get_auto_step_size", "sklearn.linear_model.sag.sag",
+ "sklearn.linear_model.sag.sag_solver", "sklearn.linear_model.sag_fast.MultinomialLogLoss",
+ "sklearn.linear_model.sag_fast._multinomial_grad_loss_all_samples", "sklearn.linear_model.sag_fast.sag",
+ "sklearn.linear_model.sgd_fast.Classification", "sklearn.linear_model.sgd_fast.EpsilonInsensitive",
+ "sklearn.linear_model.sgd_fast.Hinge", "sklearn.linear_model.sgd_fast.Huber",
+ "sklearn.linear_model.sgd_fast.Log", "sklearn.linear_model.sgd_fast.LossFunction",
+ "sklearn.linear_model.sgd_fast.ModifiedHuber", "sklearn.linear_model.sgd_fast.Regression",
+ "sklearn.linear_model.sgd_fast.SquaredEpsilonInsensitive", "sklearn.linear_model.sgd_fast.SquaredHinge",
+ "sklearn.linear_model.sgd_fast.SquaredLoss", "sklearn.linear_model.sgd_fast._plain_sgd",
+ "sklearn.linear_model.sgd_fast.average_sgd", "sklearn.linear_model.sgd_fast.plain_sgd",
+ "sklearn.linear_model.stochastic_gradient.BaseSGD", "sklearn.linear_model.stochastic_gradient.BaseSGDClassifier",
+ "sklearn.linear_model.stochastic_gradient.BaseSGDRegressor", "sklearn.linear_model.stochastic_gradient.DEFAULT_EPSILON",
+ "sklearn.linear_model.stochastic_gradient.LEARNING_RATE_TYPES", "sklearn.linear_model.stochastic_gradient.PENALTY_TYPES",
+ "sklearn.linear_model.stochastic_gradient.SGDClassifier", "sklearn.linear_model.stochastic_gradient.SGDRegressor",
+ "sklearn.linear_model.stochastic_gradient._prepare_fit_binary", "sklearn.linear_model.stochastic_gradient.fit_binary",
+ "sklearn.linear_model.theil_sen.TheilSenRegressor", "sklearn.linear_model.theil_sen._EPSILON",
+ "sklearn.linear_model.theil_sen._breakdown_point", "sklearn.linear_model.theil_sen._lstsq",
+ "sklearn.linear_model.theil_sen._modified_weiszfeld_step", "sklearn.linear_model.theil_sen._spatial_median",
+ "sklearn.linear_model.theil_sen.binom", "sklearn.manifold.Isomap",
+ "sklearn.manifold.LocallyLinearEmbedding", "sklearn.manifold.MDS",
+ "sklearn.manifold.SpectralEmbedding", "sklearn.manifold.TSNE",
+ "sklearn.manifold._barnes_hut_tsne.gradient", "sklearn.manifold._utils._binary_search_perplexity",
+ "sklearn.manifold.isomap.Isomap", "sklearn.manifold.locally_linear.FLOAT_DTYPES",
+ "sklearn.manifold.locally_linear.LocallyLinearEmbedding", "sklearn.manifold.locally_linear.barycenter_kneighbors_graph",
+ "sklearn.manifold.locally_linear.barycenter_weights", "sklearn.manifold.locally_linear.locally_linear_embedding",
+ "sklearn.manifold.locally_linear.null_space", "sklearn.manifold.locally_linear_embedding",
+ "sklearn.manifold.mds.MDS", "sklearn.manifold.mds._smacof_single",
+ "sklearn.manifold.mds.smacof", "sklearn.manifold.smacof",
+ "sklearn.manifold.spectral_embedding", "sklearn.manifold.spectral_embedding_.SpectralEmbedding",
+ "sklearn.manifold.spectral_embedding_._graph_connected_component", "sklearn.manifold.spectral_embedding_._graph_is_connected",
+ "sklearn.manifold.spectral_embedding_._set_diag", "sklearn.manifold.spectral_embedding_.spectral_embedding",
+ "sklearn.manifold.t_sne.MACHINE_EPSILON", "sklearn.manifold.t_sne.TSNE",
+ "sklearn.manifold.t_sne._gradient_descent", "sklearn.manifold.t_sne._joint_probabilities",
+ "sklearn.manifold.t_sne._joint_probabilities_nn", "sklearn.manifold.t_sne._kl_divergence",
+ "sklearn.manifold.t_sne._kl_divergence_bh", "sklearn.manifold.t_sne.string_types",
+ "sklearn.manifold.t_sne.trustworthiness", "sklearn.metrics.SCORERS",
+ "sklearn.metrics.accuracy_score", "sklearn.metrics.adjusted_mutual_info_score",
+ "sklearn.metrics.adjusted_rand_score", "sklearn.metrics.auc",
+ "sklearn.metrics.average_precision_score", "sklearn.metrics.base._average_binary_score",
+ "sklearn.metrics.brier_score_loss", "sklearn.metrics.calinski_harabaz_score",
+ "sklearn.metrics.classification._check_binary_probabilistic_predictions", "sklearn.metrics.classification._check_targets",
+ "sklearn.metrics.classification._prf_divide", "sklearn.metrics.classification._weighted_sum",
+ "sklearn.metrics.classification.accuracy_score", "sklearn.metrics.classification.brier_score_loss",
+ "sklearn.metrics.classification.classification_report", "sklearn.metrics.classification.cohen_kappa_score",
+ "sklearn.metrics.classification.confusion_matrix", "sklearn.metrics.classification.f1_score",
+ "sklearn.metrics.classification.fbeta_score", "sklearn.metrics.classification.hamming_loss",
+ "sklearn.metrics.classification.hinge_loss", "sklearn.metrics.classification.jaccard_similarity_score",
+ "sklearn.metrics.classification.log_loss", "sklearn.metrics.classification.matthews_corrcoef",
+ "sklearn.metrics.classification.precision_recall_fscore_support", "sklearn.metrics.classification.precision_score",
+ "sklearn.metrics.classification.recall_score", "sklearn.metrics.classification.zero_one_loss",
+ "sklearn.metrics.classification_report", "sklearn.metrics.cluster.adjusted_mutual_info_score",
+ "sklearn.metrics.cluster.adjusted_rand_score", "sklearn.metrics.cluster.bicluster._check_rows_and_columns",
+ "sklearn.metrics.cluster.bicluster._jaccard", "sklearn.metrics.cluster.bicluster._pairwise_similarity",
+ "sklearn.metrics.cluster.bicluster.consensus_score", "sklearn.metrics.cluster.calinski_harabaz_score",
+ "sklearn.metrics.cluster.completeness_score", "sklearn.metrics.cluster.consensus_score",
+ "sklearn.metrics.cluster.contingency_matrix", "sklearn.metrics.cluster.entropy",
+ "sklearn.metrics.cluster.expected_mutual_info_fast.expected_mutual_information", "sklearn.metrics.cluster.expected_mutual_info_fast.gammaln",
+ "sklearn.metrics.cluster.expected_mutual_information", "sklearn.metrics.cluster.fowlkes_mallows_score",
+ "sklearn.metrics.cluster.homogeneity_completeness_v_measure", "sklearn.metrics.cluster.homogeneity_score",
+ "sklearn.metrics.cluster.mutual_info_score", "sklearn.metrics.cluster.normalized_mutual_info_score",
+ "sklearn.metrics.cluster.silhouette_samples", "sklearn.metrics.cluster.silhouette_score",
+ "sklearn.metrics.cluster.supervised.adjusted_mutual_info_score", "sklearn.metrics.cluster.supervised.adjusted_rand_score",
+ "sklearn.metrics.cluster.supervised.check_clusterings", "sklearn.metrics.cluster.supervised.comb2",
+ "sklearn.metrics.cluster.supervised.completeness_score", "sklearn.metrics.cluster.supervised.contingency_matrix",
+ "sklearn.metrics.cluster.supervised.entropy", "sklearn.metrics.cluster.supervised.fowlkes_mallows_score",
+ "sklearn.metrics.cluster.supervised.homogeneity_completeness_v_measure", "sklearn.metrics.cluster.supervised.homogeneity_score",
+ "sklearn.metrics.cluster.supervised.mutual_info_score", "sklearn.metrics.cluster.supervised.normalized_mutual_info_score",
+ "sklearn.metrics.cluster.supervised.v_measure_score", "sklearn.metrics.cluster.unsupervised.calinski_harabaz_score",
+ "sklearn.metrics.cluster.unsupervised.check_number_of_labels", "sklearn.metrics.cluster.unsupervised.silhouette_samples",
+ "sklearn.metrics.cluster.unsupervised.silhouette_score", "sklearn.metrics.cluster.v_measure_score",
+ "sklearn.metrics.cohen_kappa_score", "sklearn.metrics.completeness_score",
+ "sklearn.metrics.confusion_matrix", "sklearn.metrics.consensus_score",
+ "sklearn.metrics.coverage_error", "sklearn.metrics.euclidean_distances",
+ "sklearn.metrics.explained_variance_score", "sklearn.metrics.f1_score",
+ "sklearn.metrics.fbeta_score", "sklearn.metrics.fowlkes_mallows_score",
+ "sklearn.metrics.get_scorer", "sklearn.metrics.hamming_loss",
+ "sklearn.metrics.hinge_loss", "sklearn.metrics.homogeneity_completeness_v_measure",
+ "sklearn.metrics.homogeneity_score", "sklearn.metrics.jaccard_similarity_score",
+ "sklearn.metrics.label_ranking_average_precision_score", "sklearn.metrics.label_ranking_loss",
+ "sklearn.metrics.log_loss", "sklearn.metrics.make_scorer",
+ "sklearn.metrics.matthews_corrcoef", "sklearn.metrics.mean_absolute_error",
+ "sklearn.metrics.mean_squared_error", "sklearn.metrics.mean_squared_log_error",
+ "sklearn.metrics.median_absolute_error", "sklearn.metrics.mutual_info_score",
+ "sklearn.metrics.normalized_mutual_info_score", "sklearn.metrics.pairwise.KERNEL_PARAMS",
+ "sklearn.metrics.pairwise.PAIRED_DISTANCES", "sklearn.metrics.pairwise.PAIRWISE_BOOLEAN_FUNCTIONS",
+ "sklearn.metrics.pairwise.PAIRWISE_DISTANCE_FUNCTIONS", "sklearn.metrics.pairwise.PAIRWISE_KERNEL_FUNCTIONS",
+ "sklearn.metrics.pairwise._VALID_METRICS", "sklearn.metrics.pairwise._chi2_kernel_fast",
+ "sklearn.metrics.pairwise._pairwise_callable", "sklearn.metrics.pairwise._parallel_pairwise",
+ "sklearn.metrics.pairwise._return_float_dtype", "sklearn.metrics.pairwise._sparse_manhattan",
+ "sklearn.metrics.pairwise.additive_chi2_kernel", "sklearn.metrics.pairwise.check_paired_arrays",
+ "sklearn.metrics.pairwise.check_pairwise_arrays", "sklearn.metrics.pairwise.chi2_kernel",
+ "sklearn.metrics.pairwise.cosine_distances", "sklearn.metrics.pairwise.cosine_similarity",
+ "sklearn.metrics.pairwise.distance_metrics", "sklearn.metrics.pairwise.euclidean_distances",
+ "sklearn.metrics.pairwise.kernel_metrics", "sklearn.metrics.pairwise.laplacian_kernel",
+ "sklearn.metrics.pairwise.linear_kernel", "sklearn.metrics.pairwise.manhattan_distances",
+ "sklearn.metrics.pairwise.paired_cosine_distances", "sklearn.metrics.pairwise.paired_distances",
+ "sklearn.metrics.pairwise.paired_euclidean_distances", "sklearn.metrics.pairwise.paired_manhattan_distances",
+ "sklearn.metrics.pairwise.pairwise_distances", "sklearn.metrics.pairwise.pairwise_distances_argmin",
+ "sklearn.metrics.pairwise.pairwise_distances_argmin_min", "sklearn.metrics.pairwise.pairwise_kernels",
+ "sklearn.metrics.pairwise.polynomial_kernel", "sklearn.metrics.pairwise.rbf_kernel",
+ "sklearn.metrics.pairwise.sigmoid_kernel", "sklearn.metrics.pairwise_distances",
+ "sklearn.metrics.pairwise_distances_argmin", "sklearn.metrics.pairwise_distances_argmin_min",
+ "sklearn.metrics.pairwise_fast._chi2_kernel_fast", "sklearn.metrics.pairwise_fast._sparse_manhattan",
+ "sklearn.metrics.pairwise_kernels", "sklearn.metrics.precision_recall_curve",
+ "sklearn.metrics.precision_recall_fscore_support", "sklearn.metrics.precision_score",
+ "sklearn.metrics.r2_score", "sklearn.metrics.ranking._binary_clf_curve",
+ "sklearn.metrics.ranking.auc", "sklearn.metrics.ranking.average_precision_score",
+ "sklearn.metrics.ranking.coverage_error", "sklearn.metrics.ranking.label_ranking_average_precision_score",
+ "sklearn.metrics.ranking.label_ranking_loss", "sklearn.metrics.ranking.precision_recall_curve",
+ "sklearn.metrics.ranking.roc_auc_score", "sklearn.metrics.ranking.roc_curve",
+ "sklearn.metrics.recall_score", "sklearn.metrics.regression._check_reg_targets",
+ "sklearn.metrics.regression.explained_variance_score", "sklearn.metrics.regression.mean_absolute_error",
+ "sklearn.metrics.regression.mean_squared_error", "sklearn.metrics.regression.mean_squared_log_error",
+ "sklearn.metrics.regression.median_absolute_error", "sklearn.metrics.regression.r2_score",
+ "sklearn.metrics.regression.string_types", "sklearn.metrics.roc_auc_score",
+ "sklearn.metrics.roc_curve", "sklearn.metrics.scorer.SCORERS",
+ "sklearn.metrics.scorer._BaseScorer", "sklearn.metrics.scorer._PredictScorer",
+ "sklearn.metrics.scorer._ProbaScorer", "sklearn.metrics.scorer._ThresholdScorer",
+ "sklearn.metrics.scorer._check_multimetric_scoring", "sklearn.metrics.scorer._passthrough_scorer",
+ "sklearn.metrics.scorer.accuracy_scorer", "sklearn.metrics.scorer.adjusted_mutual_info_scorer",
+ "sklearn.metrics.scorer.adjusted_rand_scorer", "sklearn.metrics.scorer.average",
+ "sklearn.metrics.scorer.average_precision_scorer", "sklearn.metrics.scorer.check_scoring",
+ "sklearn.metrics.scorer.completeness_scorer", "sklearn.metrics.scorer.deprecation_msg",
+ "sklearn.metrics.scorer.explained_variance_scorer", "sklearn.metrics.scorer.f1_scorer",
+ "sklearn.metrics.scorer.fowlkes_mallows_scorer", "sklearn.metrics.scorer.get_scorer",
+ "sklearn.metrics.scorer.homogeneity_scorer", "sklearn.metrics.scorer.log_loss_scorer",
+ "sklearn.metrics.scorer.make_scorer", "sklearn.metrics.scorer.mean_absolute_error_scorer",
+ "sklearn.metrics.scorer.mean_squared_error_scorer", "sklearn.metrics.scorer.median_absolute_error_scorer",
+ "sklearn.metrics.scorer.mutual_info_scorer", "sklearn.metrics.scorer.name",
+ "sklearn.metrics.scorer.neg_log_loss_scorer", "sklearn.metrics.scorer.neg_mean_absolute_error_scorer",
+ "sklearn.metrics.scorer.neg_mean_squared_error_scorer", "sklearn.metrics.scorer.neg_mean_squared_log_error_scorer",
+ "sklearn.metrics.scorer.neg_median_absolute_error_scorer", "sklearn.metrics.scorer.normalized_mutual_info_scorer",
+ "sklearn.metrics.scorer.precision_scorer", "sklearn.metrics.scorer.qualified_name",
+ "sklearn.metrics.scorer.r2_scorer", "sklearn.metrics.scorer.recall_scorer",
+ "sklearn.metrics.scorer.roc_auc_scorer", "sklearn.metrics.scorer.v_measure_scorer",
+ "sklearn.metrics.silhouette_samples", "sklearn.metrics.silhouette_score",
+ "sklearn.metrics.v_measure_score", "sklearn.metrics.zero_one_loss",
+ "sklearn.model_selection.BaseCrossValidator", "sklearn.model_selection.GridSearchCV",
+ "sklearn.model_selection.GroupKFold", "sklearn.model_selection.GroupShuffleSplit",
+ "sklearn.model_selection.KFold", "sklearn.model_selection.LeaveOneGroupOut",
+ "sklearn.model_selection.LeaveOneOut", "sklearn.model_selection.LeavePGroupsOut",
+ "sklearn.model_selection.LeavePOut", "sklearn.model_selection.ParameterGrid",
+ "sklearn.model_selection.ParameterSampler", "sklearn.model_selection.PredefinedSplit",
+ "sklearn.model_selection.RandomizedSearchCV", "sklearn.model_selection.RepeatedKFold",
+ "sklearn.model_selection.RepeatedStratifiedKFold", "sklearn.model_selection.ShuffleSplit",
+ "sklearn.model_selection.StratifiedKFold", "sklearn.model_selection.StratifiedShuffleSplit",
+ "sklearn.model_selection.TimeSeriesSplit", "sklearn.model_selection._search.BaseSearchCV",
+ "sklearn.model_selection._search.GridSearchCV", "sklearn.model_selection._search.ParameterGrid",
+ "sklearn.model_selection._search.ParameterSampler", "sklearn.model_selection._search.RandomizedSearchCV",
+ "sklearn.model_selection._search._CVScoreTuple", "sklearn.model_selection._search._check_param_grid",
+ "sklearn.model_selection._search.fit_grid_point", "sklearn.model_selection._search.sp_version",
+ "sklearn.model_selection._split.BaseCrossValidator", "sklearn.model_selection._split.BaseShuffleSplit",
+ "sklearn.model_selection._split.GroupKFold", "sklearn.model_selection._split.GroupShuffleSplit",
+ "sklearn.model_selection._split.KFold", "sklearn.model_selection._split.LeaveOneGroupOut",
+ "sklearn.model_selection._split.LeaveOneOut", "sklearn.model_selection._split.LeavePGroupsOut",
+ "sklearn.model_selection._split.LeavePOut", "sklearn.model_selection._split.PredefinedSplit",
+ "sklearn.model_selection._split.RepeatedKFold", "sklearn.model_selection._split.RepeatedStratifiedKFold",
+ "sklearn.model_selection._split.ShuffleSplit", "sklearn.model_selection._split.StratifiedKFold",
+ "sklearn.model_selection._split.StratifiedShuffleSplit", "sklearn.model_selection._split.TimeSeriesSplit",
+ "sklearn.model_selection._split._BaseKFold", "sklearn.model_selection._split._CVIterableWrapper",
+ "sklearn.model_selection._split._RepeatedSplits", "sklearn.model_selection._split._approximate_mode",
+ "sklearn.model_selection._split._build_repr", "sklearn.model_selection._split._validate_shuffle_split",
+ "sklearn.model_selection._split._validate_shuffle_split_init", "sklearn.model_selection._split.check_cv",
+ "sklearn.model_selection._split.train_test_split", "sklearn.model_selection._validation._aggregate_score_dicts",
+ "sklearn.model_selection._validation._check_is_permutation", "sklearn.model_selection._validation._fit_and_predict",
+ "sklearn.model_selection._validation._fit_and_score", "sklearn.model_selection._validation._incremental_fit_estimator",
+ "sklearn.model_selection._validation._index_param_value", "sklearn.model_selection._validation._multimetric_score",
+ "sklearn.model_selection._validation._permutation_test_score", "sklearn.model_selection._validation._score",
+ "sklearn.model_selection._validation._shuffle", "sklearn.model_selection._validation._translate_train_sizes",
+ "sklearn.model_selection._validation.cross_val_predict", "sklearn.model_selection._validation.cross_val_score",
+ "sklearn.model_selection._validation.cross_validate", "sklearn.model_selection._validation.learning_curve",
+ "sklearn.model_selection._validation.permutation_test_score", "sklearn.model_selection._validation.validation_curve",
+ "sklearn.model_selection.check_cv", "sklearn.model_selection.cross_val_predict",
+ "sklearn.model_selection.cross_val_score", "sklearn.model_selection.cross_validate",
+ "sklearn.model_selection.fit_grid_point", "sklearn.model_selection.learning_curve",
+ "sklearn.model_selection.permutation_test_score", "sklearn.model_selection.train_test_split",
+ "sklearn.model_selection.validation_curve", "sklearn.multiclass.OneVsOneClassifier",
+ "sklearn.multiclass.OneVsRestClassifier", "sklearn.multiclass.OutputCodeClassifier",
+ "sklearn.multiclass._ConstantPredictor", "sklearn.multiclass._check_estimator",
+ "sklearn.multiclass._fit_binary", "sklearn.multiclass._fit_ovo_binary",
+ "sklearn.multiclass._partial_fit_binary", "sklearn.multiclass._partial_fit_ovo_binary",
+ "sklearn.multiclass._predict_binary", "sklearn.naive_bayes.BaseDiscreteNB",
+ "sklearn.naive_bayes.BaseNB", "sklearn.naive_bayes.BernoulliNB",
+ "sklearn.naive_bayes.GaussianNB", "sklearn.naive_bayes.MultinomialNB",
+ "sklearn.naive_bayes._ALPHA_MIN", "sklearn.neighbors.BallTree",
+ "sklearn.neighbors.DistanceMetric", "sklearn.neighbors.KDTree",
+ "sklearn.neighbors.KNeighborsClassifier", "sklearn.neighbors.KNeighborsRegressor",
+ "sklearn.neighbors.KernelDensity", "sklearn.neighbors.LSHForest",
+ "sklearn.neighbors.LocalOutlierFactor", "sklearn.neighbors.NearestCentroid",
+ "sklearn.neighbors.NearestNeighbors", "sklearn.neighbors.RadiusNeighborsClassifier",
+ "sklearn.neighbors.RadiusNeighborsRegressor", "sklearn.neighbors.approximate.GaussianRandomProjectionHash",
+ "sklearn.neighbors.approximate.HASH_DTYPE", "sklearn.neighbors.approximate.LSHForest",
+ "sklearn.neighbors.approximate.MAX_HASH_SIZE", "sklearn.neighbors.approximate.ProjectionToHashMixin",
+ "sklearn.neighbors.approximate._array_of_arrays", "sklearn.neighbors.approximate._find_longest_prefix_match",
+ "sklearn.neighbors.approximate._find_matching_indices", "sklearn.neighbors.ball_tree.BallTree",
+ "sklearn.neighbors.ball_tree.BinaryTree", "sklearn.neighbors.ball_tree.CLASS_DOC",
+ "sklearn.neighbors.ball_tree.DOC_DICT", "sklearn.neighbors.ball_tree.NeighborsHeap",
+ "sklearn.neighbors.ball_tree.NodeData", "sklearn.neighbors.ball_tree.NodeHeap",
+ "sklearn.neighbors.ball_tree.NodeHeapData", "sklearn.neighbors.ball_tree.VALID_METRICS",
+ "sklearn.neighbors.ball_tree.VALID_METRIC_IDS", "sklearn.neighbors.ball_tree.kernel_norm",
+ "sklearn.neighbors.ball_tree.load_heap", "sklearn.neighbors.ball_tree.newObj",
+ "sklearn.neighbors.ball_tree.nodeheap_sort", "sklearn.neighbors.ball_tree.offsets",
+ "sklearn.neighbors.ball_tree.simultaneous_sort", "sklearn.neighbors.base.KNeighborsMixin",
+ "sklearn.neighbors.base.NeighborsBase", "sklearn.neighbors.base.PAIRWISE_DISTANCE_FUNCTIONS",
+ "sklearn.neighbors.base.RadiusNeighborsMixin", "sklearn.neighbors.base.SupervisedFloatMixin",
+ "sklearn.neighbors.base.SupervisedIntegerMixin", "sklearn.neighbors.base.UnsupervisedMixin",
+ "sklearn.neighbors.base.VALID_METRICS", "sklearn.neighbors.base.VALID_METRICS_SPARSE",
+ "sklearn.neighbors.base._check_weights", "sklearn.neighbors.base._get_weights",
+ "sklearn.neighbors.classification.KNeighborsClassifier", "sklearn.neighbors.classification.RadiusNeighborsClassifier",
+ "sklearn.neighbors.dist_metrics.BrayCurtisDistance", "sklearn.neighbors.dist_metrics.CanberraDistance",
+ "sklearn.neighbors.dist_metrics.ChebyshevDistance", "sklearn.neighbors.dist_metrics.DiceDistance",
+ "sklearn.neighbors.dist_metrics.DistanceMetric", "sklearn.neighbors.dist_metrics.EuclideanDistance",
+ "sklearn.neighbors.dist_metrics.HammingDistance", "sklearn.neighbors.dist_metrics.HaversineDistance",
+ "sklearn.neighbors.dist_metrics.JaccardDistance", "sklearn.neighbors.dist_metrics.KulsinskiDistance",
+ "sklearn.neighbors.dist_metrics.METRIC_MAPPING", "sklearn.neighbors.dist_metrics.MahalanobisDistance",
+ "sklearn.neighbors.dist_metrics.ManhattanDistance", "sklearn.neighbors.dist_metrics.MatchingDistance",
+ "sklearn.neighbors.dist_metrics.MinkowskiDistance", "sklearn.neighbors.dist_metrics.PyFuncDistance",
+ "sklearn.neighbors.dist_metrics.RogersTanimotoDistance", "sklearn.neighbors.dist_metrics.RussellRaoDistance",
+ "sklearn.neighbors.dist_metrics.SEuclideanDistance", "sklearn.neighbors.dist_metrics.SokalMichenerDistance",
+ "sklearn.neighbors.dist_metrics.SokalSneathDistance", "sklearn.neighbors.dist_metrics.WMinkowskiDistance",
+ "sklearn.neighbors.dist_metrics.get_valid_metric_ids", "sklearn.neighbors.dist_metrics.newObj",
+ "sklearn.neighbors.graph._check_params", "sklearn.neighbors.graph._query_include_self",
+ "sklearn.neighbors.graph.kneighbors_graph", "sklearn.neighbors.graph.radius_neighbors_graph",
+ "sklearn.neighbors.kd_tree.BinaryTree", "sklearn.neighbors.kd_tree.CLASS_DOC",
+ "sklearn.neighbors.kd_tree.DOC_DICT", "sklearn.neighbors.kd_tree.KDTree",
+ "sklearn.neighbors.kd_tree.NeighborsHeap", "sklearn.neighbors.kd_tree.NodeData",
+ "sklearn.neighbors.kd_tree.NodeHeap", "sklearn.neighbors.kd_tree.NodeHeapData",
+ "sklearn.neighbors.kd_tree.VALID_METRICS", "sklearn.neighbors.kd_tree.VALID_METRIC_IDS",
+ "sklearn.neighbors.kd_tree.kernel_norm", "sklearn.neighbors.kd_tree.load_heap",
+ "sklearn.neighbors.kd_tree.newObj", "sklearn.neighbors.kd_tree.nodeheap_sort",
+ "sklearn.neighbors.kd_tree.offsets", "sklearn.neighbors.kd_tree.simultaneous_sort",
+ "sklearn.neighbors.kde.KernelDensity", "sklearn.neighbors.kde.TREE_DICT",
+ "sklearn.neighbors.kde.VALID_KERNELS", "sklearn.neighbors.kde.gammainc",
+ "sklearn.neighbors.kneighbors_graph", "sklearn.neighbors.lof.LocalOutlierFactor",
+ "sklearn.neighbors.nearest_centroid.NearestCentroid", "sklearn.neighbors.quad_tree.CELL_DTYPE",
+ "sklearn.neighbors.quad_tree._QuadTree", "sklearn.neighbors.radius_neighbors_graph",
+ "sklearn.neighbors.regression.KNeighborsRegressor", "sklearn.neighbors.regression.RadiusNeighborsRegressor",
+ "sklearn.neighbors.unsupervised.NearestNeighbors", "sklearn.pipeline.FeatureUnion",
+ "sklearn.pipeline.Pipeline", "sklearn.pipeline._fit_one_transformer",
+ "sklearn.pipeline._fit_transform_one", "sklearn.pipeline._name_estimators",
+ "sklearn.pipeline._transform_one", "sklearn.pipeline.make_pipeline",
+ "sklearn.pipeline.make_union", "sklearn.preprocessing.Binarizer",
+ "sklearn.preprocessing.FunctionTransformer", "sklearn.preprocessing.Imputer",
+ "sklearn.preprocessing.KernelCenterer", "sklearn.preprocessing.LabelBinarizer",
+ "sklearn.preprocessing.LabelEncoder", "sklearn.preprocessing.MaxAbsScaler",
+ "sklearn.preprocessing.MinMaxScaler", "sklearn.preprocessing.MultiLabelBinarizer",
+ "sklearn.preprocessing.Normalizer", "sklearn.preprocessing.OneHotEncoder",
+ "sklearn.preprocessing.PolynomialFeatures", "sklearn.preprocessing.QuantileTransformer",
+ "sklearn.preprocessing.RobustScaler", "sklearn.preprocessing.StandardScaler",
+ "sklearn.preprocessing._function_transformer.FunctionTransformer", "sklearn.preprocessing._function_transformer._identity",
+ "sklearn.preprocessing._function_transformer.string_types", "sklearn.preprocessing.add_dummy_feature",
+ "sklearn.preprocessing.binarize", "sklearn.preprocessing.data.BOUNDS_THRESHOLD",
+ "sklearn.preprocessing.data.Binarizer", "sklearn.preprocessing.data.FLOAT_DTYPES",
+ "sklearn.preprocessing.data.KernelCenterer", "sklearn.preprocessing.data.MaxAbsScaler",
+ "sklearn.preprocessing.data.MinMaxScaler", "sklearn.preprocessing.data.Normalizer",
+ "sklearn.preprocessing.data.OneHotEncoder", "sklearn.preprocessing.data.PolynomialFeatures",
+ "sklearn.preprocessing.data.QuantileTransformer", "sklearn.preprocessing.data.RobustScaler",
+ "sklearn.preprocessing.data.StandardScaler", "sklearn.preprocessing.data._handle_zeros_in_scale",
+ "sklearn.preprocessing.data._transform_selected", "sklearn.preprocessing.data.add_dummy_feature",
+ "sklearn.preprocessing.data.binarize", "sklearn.preprocessing.data.maxabs_scale",
+ "sklearn.preprocessing.data.minmax_scale", "sklearn.preprocessing.data.normalize",
+ "sklearn.preprocessing.data.quantile_transform", "sklearn.preprocessing.data.robust_scale",
+ "sklearn.preprocessing.data.scale", "sklearn.preprocessing.data.string_types",
+ "sklearn.preprocessing.imputation.FLOAT_DTYPES", "sklearn.preprocessing.imputation.Imputer",
+ "sklearn.preprocessing.imputation._get_mask", "sklearn.preprocessing.imputation._most_frequent",
+ "sklearn.preprocessing.label.LabelBinarizer", "sklearn.preprocessing.label.LabelEncoder",
+ "sklearn.preprocessing.label.MultiLabelBinarizer", "sklearn.preprocessing.label._inverse_binarize_multiclass",
+ "sklearn.preprocessing.label._inverse_binarize_thresholding", "sklearn.preprocessing.label.label_binarize",
+ "sklearn.preprocessing.label_binarize", "sklearn.preprocessing.maxabs_scale",
+ "sklearn.preprocessing.minmax_scale", "sklearn.preprocessing.normalize",
+ "sklearn.preprocessing.quantile_transform", "sklearn.preprocessing.robust_scale",
+ "sklearn.preprocessing.scale", "sklearn.random_projection.BaseRandomProjection",
+ "sklearn.random_projection.GaussianRandomProjection", "sklearn.random_projection.SparseRandomProjection",
+ "sklearn.random_projection._check_density", "sklearn.random_projection._check_input_size",
+ "sklearn.random_projection.gaussian_random_matrix", "sklearn.random_projection.johnson_lindenstrauss_min_dim",
+ "sklearn.random_projection.sparse_random_matrix", "sklearn.set_config",
+ "sklearn.setup_module", "sklearn.svm.LinearSVC",
+ "sklearn.svm.LinearSVR", "sklearn.svm.NuSVC",
+ "sklearn.svm.NuSVR", "sklearn.svm.OneClassSVM",
+ "sklearn.svm.SVC", "sklearn.svm.SVR",
+ "sklearn.svm.base.BaseLibSVM", "sklearn.svm.base.BaseSVC",
+ "sklearn.svm.base.LIBSVM_IMPL", "sklearn.svm.base._fit_liblinear",
+ "sklearn.svm.base._get_liblinear_solver_type", "sklearn.svm.base._one_vs_one_coef",
+ "sklearn.svm.bounds.l1_min_c", "sklearn.svm.classes.LinearSVC",
+ "sklearn.svm.classes.LinearSVR", "sklearn.svm.classes.NuSVC",
+ "sklearn.svm.classes.NuSVR", "sklearn.svm.classes.OneClassSVM",
+ "sklearn.svm.classes.SVC", "sklearn.svm.classes.SVR",
+ "sklearn.svm.l1_min_c", "sklearn.svm.liblinear.set_verbosity_wrap",
+ "sklearn.svm.liblinear.train_wrap", "sklearn.svm.libsvm.LIBSVM_KERNEL_TYPES",
+ "sklearn.svm.libsvm.cross_validation", "sklearn.svm.libsvm.decision_function",
+ "sklearn.svm.libsvm.fit", "sklearn.svm.libsvm.predict",
+ "sklearn.svm.libsvm.predict_proba", "sklearn.svm.libsvm.set_verbosity_wrap",
+ "sklearn.svm.libsvm_sparse.libsvm_sparse_decision_function", "sklearn.svm.libsvm_sparse.libsvm_sparse_predict",
+ "sklearn.svm.libsvm_sparse.libsvm_sparse_predict_proba", "sklearn.svm.libsvm_sparse.libsvm_sparse_train",
+ "sklearn.svm.libsvm_sparse.set_verbosity_wrap", "sklearn.tree.DecisionTreeClassifier",
+ "sklearn.tree.DecisionTreeRegressor", "sklearn.tree.ExtraTreeClassifier",
+ "sklearn.tree.ExtraTreeRegressor", "sklearn.tree._criterion.ClassificationCriterion",
+ "sklearn.tree._criterion.Criterion", "sklearn.tree._criterion.Entropy",
+ "sklearn.tree._criterion.FriedmanMSE", "sklearn.tree._criterion.Gini",
+ "sklearn.tree._criterion.MAE", "sklearn.tree._criterion.MSE",
+ "sklearn.tree._criterion.RegressionCriterion", "sklearn.tree._splitter.BaseDenseSplitter",
+ "sklearn.tree._splitter.BaseSparseSplitter", "sklearn.tree._splitter.BestSparseSplitter",
+ "sklearn.tree._splitter.BestSplitter", "sklearn.tree._splitter.RandomSparseSplitter",
+ "sklearn.tree._splitter.RandomSplitter", "sklearn.tree._splitter.Splitter",
+ "sklearn.tree._tree.BestFirstTreeBuilder", "sklearn.tree._tree.DepthFirstTreeBuilder",
+ "sklearn.tree._tree.NODE_DTYPE", "sklearn.tree._tree.TREE_LEAF",
+ "sklearn.tree._tree.TREE_UNDEFINED", "sklearn.tree._tree.Tree",
+ "sklearn.tree._tree.TreeBuilder", "sklearn.tree._utils.PriorityHeap",
+ "sklearn.tree._utils.Stack", "sklearn.tree._utils.WeightedMedianCalculator",
+ "sklearn.tree._utils.WeightedPQueue", "sklearn.tree._utils._realloc_test",
+ "sklearn.tree.export.SENTINEL", "sklearn.tree.export.Sentinel",
+ "sklearn.tree.export._color_brew", "sklearn.tree.export.export_graphviz",
+ "sklearn.tree.export_graphviz", "sklearn.tree.tree.BaseDecisionTree",
+ "sklearn.tree.tree.CRITERIA_CLF", "sklearn.tree.tree.CRITERIA_REG",
+ "sklearn.tree.tree.DENSE_SPLITTERS", "sklearn.tree.tree.DecisionTreeClassifier",
+ "sklearn.tree.tree.DecisionTreeRegressor", "sklearn.tree.tree.ExtraTreeClassifier",
+ "sklearn.tree.tree.ExtraTreeRegressor", "sklearn.tree.tree.SPARSE_SPLITTERS",
+ "sklearn.utils.Bunch", "sklearn.utils._get_n_jobs",
+ "sklearn.utils._logistic_sigmoid._log_logistic_sigmoid", "sklearn.utils._random._sample_without_replacement_check_input",
+ "sklearn.utils._random._sample_without_replacement_with_pool", "sklearn.utils._random._sample_without_replacement_with_reservoir_sampling",
+ "sklearn.utils._random._sample_without_replacement_with_tracking_selection", "sklearn.utils._random.sample_without_replacement",
+ "sklearn.utils.arrayfuncs.cholesky_delete", "sklearn.utils.arrayfuncs.min_pos",
+ "sklearn.utils.as_float_array", "sklearn.utils.assert_all_finite",
+ "sklearn.utils.axis0_safe_slice", "sklearn.utils.check_X_y",
+ "sklearn.utils.check_array", "sklearn.utils.check_consistent_length",
+ "sklearn.utils.check_random_state", "sklearn.utils.check_symmetric",
+ "sklearn.utils.class_weight.compute_class_weight", "sklearn.utils.class_weight.compute_sample_weight",
+ "sklearn.utils.column_or_1d", "sklearn.utils.compute_class_weight",
+ "sklearn.utils.compute_sample_weight", "sklearn.utils.deprecated",
+ "sklearn.utils.deprecation.DeprecationDict", "sklearn.utils.deprecation._is_deprecated",
+ "sklearn.utils.deprecation.deprecated", "sklearn.utils.extmath._deterministic_vector_sign_flip",
+ "sklearn.utils.extmath._impose_f_order", "sklearn.utils.extmath._incremental_mean_and_var",
+ "sklearn.utils.extmath.cartesian", "sklearn.utils.extmath.density",
+ "sklearn.utils.extmath.fast_dot", "sklearn.utils.extmath.fast_logdet",
+ "sklearn.utils.extmath.log_logistic", "sklearn.utils.extmath.logsumexp",
+ "sklearn.utils.extmath.make_nonnegative", "sklearn.utils.extmath.norm",
+ "sklearn.utils.extmath.np_version", "sklearn.utils.extmath.pinvh",
+ "sklearn.utils.extmath.randomized_range_finder", "sklearn.utils.extmath.randomized_svd",
+ "sklearn.utils.extmath.row_norms", "sklearn.utils.extmath.safe_min",
+ "sklearn.utils.extmath.safe_sparse_dot", "sklearn.utils.extmath.softmax",
+ "sklearn.utils.extmath.squared_norm", "sklearn.utils.extmath.stable_cumsum",
+ "sklearn.utils.extmath.svd_flip", "sklearn.utils.extmath.weighted_mode",
+ "sklearn.utils.fast_dict.IntFloatDict", "sklearn.utils.fast_dict.argmin",
+ "sklearn.utils.fixes._parse_version", "sklearn.utils.fixes.divide",
+ "sklearn.utils.fixes.euler_gamma", "sklearn.utils.fixes.makedirs",
+ "sklearn.utils.fixes.np_version", "sklearn.utils.fixes.parallel_helper",
+ "sklearn.utils.fixes.sp_version", "sklearn.utils.fixes.sparse_min_max",
+ "sklearn.utils.gen_batches", "sklearn.utils.gen_even_slices",
+ "sklearn.utils.graph.connected_components", "sklearn.utils.graph.graph_laplacian",
+ "sklearn.utils.graph.graph_shortest_path", "sklearn.utils.graph.single_source_shortest_path_length",
+ "sklearn.utils.graph_shortest_path.graph_shortest_path", "sklearn.utils.indexable",
+ "sklearn.utils.indices_to_mask", "sklearn.utils.linear_assignment_._HungarianState",
+ "sklearn.utils.linear_assignment_._hungarian", "sklearn.utils.linear_assignment_._step1",
+ "sklearn.utils.linear_assignment_._step3", "sklearn.utils.linear_assignment_._step4",
+ "sklearn.utils.linear_assignment_._step5", "sklearn.utils.linear_assignment_._step6",
+ "sklearn.utils.linear_assignment_.linear_assignment", "sklearn.utils.metaestimators._BaseComposition",
+ "sklearn.utils.metaestimators._IffHasAttrDescriptor", "sklearn.utils.metaestimators._safe_split",
+ "sklearn.utils.metaestimators.if_delegate_has_method", "sklearn.utils.multiclass._FN_UNIQUE_LABELS",
+ "sklearn.utils.multiclass._check_partial_fit_first_call", "sklearn.utils.multiclass._is_integral_float",
+ "sklearn.utils.multiclass._ovr_decision_function", "sklearn.utils.multiclass._unique_indicator",
+ "sklearn.utils.multiclass._unique_multiclass", "sklearn.utils.multiclass.check_classification_targets",
+ "sklearn.utils.multiclass.class_distribution", "sklearn.utils.multiclass.is_multilabel",
+ "sklearn.utils.multiclass.string_types", "sklearn.utils.multiclass.type_of_target",
+ "sklearn.utils.multiclass.unique_labels", "sklearn.utils.murmurhash.murmurhash3_32",
+ "sklearn.utils.murmurhash.murmurhash3_bytes_array_s32", "sklearn.utils.murmurhash.murmurhash3_bytes_array_u32",
+ "sklearn.utils.murmurhash.murmurhash3_bytes_s32", "sklearn.utils.murmurhash.murmurhash3_bytes_u32",
+ "sklearn.utils.murmurhash.murmurhash3_int_s32", "sklearn.utils.murmurhash.murmurhash3_int_u32",
+ "sklearn.utils.murmurhash3_32", "sklearn.utils.optimize._LineSearchError",
+ "sklearn.utils.optimize._cg", "sklearn.utils.optimize._line_search_wolfe12",
+ "sklearn.utils.optimize.newton_cg", "sklearn.utils.random.choice",
+ "sklearn.utils.random.random_choice_csc", "sklearn.utils.resample",
+ "sklearn.utils.safe_indexing", "sklearn.utils.safe_mask",
+ "sklearn.utils.safe_sqr", "sklearn.utils.seq_dataset.ArrayDataset",
+ "sklearn.utils.seq_dataset.CSRDataset", "sklearn.utils.seq_dataset.SequentialDataset",
+ "sklearn.utils.shuffle", "sklearn.utils.sparsefuncs._csc_mean_var_axis0",
+ "sklearn.utils.sparsefuncs._csr_mean_var_axis0", "sklearn.utils.sparsefuncs._get_elem_at_rank",
+ "sklearn.utils.sparsefuncs._get_median", "sklearn.utils.sparsefuncs._incr_mean_var_axis0",
+ "sklearn.utils.sparsefuncs._raise_error_wrong_axis", "sklearn.utils.sparsefuncs._raise_typeerror",
+ "sklearn.utils.sparsefuncs.count_nonzero", "sklearn.utils.sparsefuncs.csc_median_axis_0",
+ "sklearn.utils.sparsefuncs.incr_mean_variance_axis", "sklearn.utils.sparsefuncs.inplace_column_scale",
+ "sklearn.utils.sparsefuncs.inplace_csr_column_scale", "sklearn.utils.sparsefuncs.inplace_csr_row_scale",
+ "sklearn.utils.sparsefuncs.inplace_row_scale", "sklearn.utils.sparsefuncs.inplace_swap_column",
+ "sklearn.utils.sparsefuncs.inplace_swap_row", "sklearn.utils.sparsefuncs.inplace_swap_row_csc",
+ "sklearn.utils.sparsefuncs.inplace_swap_row_csr", "sklearn.utils.sparsefuncs.mean_variance_axis",
+ "sklearn.utils.sparsefuncs.min_max_axis", "sklearn.utils.sparsefuncs_fast._csc_mean_variance_axis0",
+ "sklearn.utils.sparsefuncs_fast._csr_mean_variance_axis0", "sklearn.utils.sparsefuncs_fast._csr_row_norms",
+ "sklearn.utils.sparsefuncs_fast._incr_mean_variance_axis0", "sklearn.utils.sparsefuncs_fast._inplace_csr_row_normalize_l1",
+ "sklearn.utils.sparsefuncs_fast._inplace_csr_row_normalize_l2", "sklearn.utils.sparsefuncs_fast.assign_rows_csr",
+ "sklearn.utils.sparsefuncs_fast.csc_mean_variance_axis0", "sklearn.utils.sparsefuncs_fast.csr_mean_variance_axis0",
+ "sklearn.utils.sparsefuncs_fast.csr_row_norms", "sklearn.utils.sparsefuncs_fast.incr_mean_variance_axis0",
+ "sklearn.utils.sparsefuncs_fast.inplace_csr_row_normalize_l1", "sklearn.utils.sparsefuncs_fast.inplace_csr_row_normalize_l2",
+ "sklearn.utils.stats._weighted_percentile", "sklearn.utils.stats.rankdata",
+ "sklearn.utils.tosequence", "sklearn.utils.validation.FLOAT_DTYPES",
+ "sklearn.utils.validation._assert_all_finite", "sklearn.utils.validation._ensure_sparse_format",
+ "sklearn.utils.validation._is_arraylike", "sklearn.utils.validation._num_samples",
+ "sklearn.utils.validation._shape_repr", "sklearn.utils.validation.as_float_array",
+ "sklearn.utils.validation.assert_all_finite", "sklearn.utils.validation.check_X_y",
+ "sklearn.utils.validation.check_array", "sklearn.utils.validation.check_consistent_length",
+ "sklearn.utils.validation.check_is_fitted", "sklearn.utils.validation.check_memory",
+ "sklearn.utils.validation.check_non_negative", "sklearn.utils.validation.check_random_state",
+ "sklearn.utils.validation.check_symmetric", "sklearn.utils.validation.column_or_1d",
+ "sklearn.utils.validation.has_fit_parameter", "sklearn.utils.validation.indexable",
+ "sklearn.utils.weight_vector.WeightVector"
+],
+
+ "SKR_NAMES": [
+ "skrebate.MultiSURF", "skrebate.MultiSURFstar",
+ "skrebate.ReliefF", "skrebate.SURF",
+ "skrebate.SURFstar", "skrebate.TuRF",
+ "skrebate.multisurf.MultiSURF", "skrebate.multisurfstar.MultiSURFstar",
+ "skrebate.relieff.ReliefF", "skrebate.scoring_utils.MultiSURF_compute_scores",
+ "skrebate.scoring_utils.MultiSURFstar_compute_scores", "skrebate.scoring_utils.ReliefF_compute_scores",
+ "skrebate.scoring_utils.SURF_compute_scores", "skrebate.scoring_utils.SURFstar_compute_scores",
+ "skrebate.scoring_utils.compute_score", "skrebate.scoring_utils.get_row_missing",
+ "skrebate.scoring_utils.ramp_function", "skrebate.surf.SURF",
+ "skrebate.surfstar.SURFstar", "skrebate.turf.TuRF"
+ ],
+
+ "XGB_NAMES": [
+ "xgboost.Booster", "xgboost.DMatrix",
+ "xgboost.VERSION_FILE", "xgboost.XGBClassifier",
+ "xgboost.XGBModel", "xgboost.XGBRegressor",
+ "xgboost.callback._fmt_metric", "xgboost.callback._get_callback_context",
+ "xgboost.callback.early_stop", "xgboost.callback.print_evaluation",
+ "xgboost.callback.record_evaluation", "xgboost.callback.reset_learning_rate",
+ "xgboost.compat.PANDAS_INSTALLED", "xgboost.compat.PY3",
+ "xgboost.compat.SKLEARN_INSTALLED", "xgboost.compat.STRING_TYPES",
+ "xgboost.compat.py_str", "xgboost.core.Booster",
+ "xgboost.core.CallbackEnv", "xgboost.core.DMatrix",
+ "xgboost.core.EarlyStopException", "xgboost.core.PANDAS_DTYPE_MAPPER",
+ "xgboost.core.PANDAS_INSTALLED", "xgboost.core.PY3",
+ "xgboost.core.STRING_TYPES", "xgboost.core.XGBoostError",
+ "xgboost.core._check_call", "xgboost.core._load_lib",
+ "xgboost.core._maybe_pandas_data", "xgboost.core._maybe_pandas_label",
+ "xgboost.core.c_array", "xgboost.core.c_str",
+ "xgboost.core.ctypes2buffer", "xgboost.core.ctypes2numpy",
+ "xgboost.core.from_cstr_to_pystr", "xgboost.core.from_pystr_to_cstr",
+ "xgboost.cv", "xgboost.f",
+ "xgboost.libpath.XGBoostLibraryNotFound", "xgboost.libpath.find_lib_path",
+ "xgboost.plot_importance", "xgboost.plot_tree",
+ "xgboost.plotting._EDGEPAT", "xgboost.plotting._EDGEPAT2",
+ "xgboost.plotting._LEAFPAT", "xgboost.plotting._NODEPAT",
+ "xgboost.plotting._parse_edge", "xgboost.plotting._parse_node",
+ "xgboost.plotting.plot_importance", "xgboost.plotting.plot_tree",
+ "xgboost.plotting.to_graphviz", "xgboost.rabit.DTYPE_ENUM__",
+ "xgboost.rabit.STRING_TYPES", "xgboost.rabit._init_rabit",
+ "xgboost.rabit.allreduce", "xgboost.rabit.broadcast",
+ "xgboost.rabit.finalize", "xgboost.rabit.get_processor_name",
+ "xgboost.rabit.get_rank", "xgboost.rabit.get_world_size",
+ "xgboost.rabit.init", "xgboost.rabit.tracker_print",
+ "xgboost.rabit.version_number", "xgboost.sklearn.SKLEARN_INSTALLED",
+ "xgboost.sklearn.XGBClassifier", "xgboost.sklearn.XGBModel",
+ "xgboost.sklearn.XGBRegressor", "xgboost.sklearn._objective_decorator",
+ "xgboost.to_graphviz", "xgboost.train",
+ "xgboost.training.CVPack", "xgboost.training.SKLEARN_INSTALLED",
+ "xgboost.training.STRING_TYPES", "xgboost.training._train_internal",
+ "xgboost.training.aggcv", "xgboost.training.cv",
+ "xgboost.training.mknfold", "xgboost.training.train"
+ ],
+
+
+ "NUMPY_NAMES": [
+ "numpy.core.multiarray._reconstruct", "numpy.ndarray",
+ "numpy.dtype", "numpy.core.multiarray.scalar", "numpy.random.__RandomState_ctor",
+ "numpy.ma.core._mareconstruct", "numpy.ma.core.MaskedArray"
+ ],
+
+ "IMBLEARN_NAMES":[
+ "imblearn.pipeline.Pipeline", "imblearn.over_sampling._random_over_sampler.RandomOverSampler",
+ "imblearn.under_sampling._prototype_selection._edited_nearest_neighbours.EditedNearestNeighbours"
+ ],
+
+ "MLXTEND_NAMES":[
+ "mlxtend.classifier.stacking_cv_classification.StackingCVClassifier",
+ "mlxtend.classifier.stacking_classification.StackingClassifier",
+ "mlxtend.regressor.stacking_cv_regression.StackingCVRegressor",
+ "mlxtend.regressor.stacking_regression.StackingRegressor"
+ ]
+}
\ No newline at end of file
diff -r 39ae276e75d9 -r e94395c672bd preprocessors.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/preprocessors.py Tue May 14 18:15:12 2019 -0400
@@ -0,0 +1,184 @@
+"""
+Z_RandomOverSampler
+"""
+
+import imblearn
+import numpy as np
+
+from collections import Counter
+from imblearn.over_sampling.base import BaseOverSampler
+from imblearn.over_sampling import RandomOverSampler
+from imblearn.pipeline import Pipeline as imbPipeline
+from imblearn.utils import check_target_type
+from scipy import sparse
+from sklearn.base import BaseEstimator, TransformerMixin
+from sklearn.preprocessing.data import _handle_zeros_in_scale
+from sklearn.utils import check_array, safe_indexing
+from sklearn.utils.fixes import nanpercentile
+from sklearn.utils.validation import (check_is_fitted, check_X_y,
+ FLOAT_DTYPES)
+
+
+class Z_RandomOverSampler(BaseOverSampler):
+
+ def __init__(self, sampling_strategy='auto',
+ return_indices=False,
+ random_state=None,
+ ratio=None,
+ negative_thres=0,
+ positive_thres=-1):
+ super(Z_RandomOverSampler, self).__init__(
+ sampling_strategy=sampling_strategy, ratio=ratio)
+ self.random_state = random_state
+ self.return_indices = return_indices
+ self.negative_thres = negative_thres
+ self.positive_thres = positive_thres
+
+ @staticmethod
+ def _check_X_y(X, y):
+ y, binarize_y = check_target_type(y, indicate_one_vs_all=True)
+ X, y = check_X_y(X, y, accept_sparse=['csr', 'csc'], dtype=None)
+ return X, y, binarize_y
+
+ def _fit_resample(self, X, y):
+ n_samples = X.shape[0]
+
+ # convert y to z_score
+ y_z = (y - y.mean()) / y.std()
+
+ index0 = np.arange(n_samples)
+ index_negative = index0[y_z > self.negative_thres]
+ index_positive = index0[y_z <= self.positive_thres]
+ index_unclassified = [x for x in index0
+ if x not in index_negative
+ and x not in index_positive]
+
+ y_z[index_negative] = 0
+ y_z[index_positive] = 1
+ y_z[index_unclassified] = -1
+
+ ros = RandomOverSampler(
+ sampling_strategy=self.sampling_strategy,
+ random_state=self.random_state,
+ ratio=self.ratio)
+ _, _ = ros.fit_resample(X, y_z)
+ sample_indices = ros.sample_indices_
+
+ print("Before sampler: %s. Total after: %s"
+ % (Counter(y_z), sample_indices.shape))
+
+ self.sample_indices_ = np.array(sample_indices)
+
+ if self.return_indices:
+ return (safe_indexing(X, sample_indices),
+ safe_indexing(y, sample_indices),
+ sample_indices)
+ return (safe_indexing(X, sample_indices),
+ safe_indexing(y, sample_indices))
+
+
+def _get_quantiles(X, quantile_range):
+ """
+ Calculate column percentiles for 2d array
+
+ Parameters
+ ----------
+ X : array-like, shape [n_samples, n_features]
+ """
+ quantiles = []
+ for feature_idx in range(X.shape[1]):
+ if sparse.issparse(X):
+ column_nnz_data = X.data[
+ X.indptr[feature_idx]: X.indptr[feature_idx + 1]]
+ column_data = np.zeros(shape=X.shape[0], dtype=X.dtype)
+ column_data[:len(column_nnz_data)] = column_nnz_data
+ else:
+ column_data = X[:, feature_idx]
+ quantiles.append(nanpercentile(column_data, quantile_range))
+
+ quantiles = np.transpose(quantiles)
+
+ return quantiles
+
+
+class TDMScaler(BaseEstimator, TransformerMixin):
+ """
+ Scale features using Training Distribution Matching (TDM) algorithm
+
+ References
+ ----------
+ .. [1] Thompson JA, Tan J and Greene CS (2016) Cross-platform
+ normalization of microarray and RNA-seq data for machine
+ learning applications. PeerJ 4, e1621.
+ """
+
+ def __init__(self, q_lower=25.0, q_upper=75.0, ):
+ self.q_lower = q_lower
+ self.q_upper = q_upper
+
+ def fit(self, X, y=None):
+ """
+ Parameters
+ ----------
+ X : array-like, shape [n_samples, n_features]
+ """
+ X = check_array(X, copy=True, estimator=self, dtype=FLOAT_DTYPES,
+ force_all_finite=True)
+
+ if not 0 <= self.q_lower <= self.q_upper <= 100:
+ raise ValueError("Invalid quantile parameter values: "
+ "q_lower %s, q_upper: %s"
+ % (str(self.q_lower), str(self.q_upper)))
+
+ # TODO sparse data
+ quantiles = nanpercentile(X, (self.q_lower, self.q_upper))
+ iqr = quantiles[1] - quantiles[0]
+
+ self.q_lower_ = quantiles[0]
+ self.q_upper_ = quantiles[1]
+ self.iqr_ = _handle_zeros_in_scale(iqr, copy=False)
+
+ self.max_ = np.nanmax(X)
+ self.min_ = np.nanmin(X)
+
+ return self
+
+ def transform(self, X):
+ """
+ Parameters
+ ----------
+ X : {array-like, sparse matrix}
+ The data used to scale along the specified axis.
+ """
+ check_is_fitted(self, 'iqr_', 'max_')
+ X = check_array(X, copy=True, estimator=self, dtype=FLOAT_DTYPES,
+ force_all_finite=True)
+
+ # TODO sparse data
+ train_upper_scale = (self.max_ - self.q_upper_) / self.iqr_
+ train_lower_scale = (self.q_lower_ - self.min_) / self.iqr_
+
+ test_quantiles = nanpercentile(X, (self.q_lower, self.q_upper))
+ test_iqr = _handle_zeros_in_scale(
+ test_quantiles[1] - test_quantiles[0], copy=False)
+
+ test_upper_bound = test_quantiles[1] + train_upper_scale * test_iqr
+ test_lower_bound = test_quantiles[0] - train_lower_scale * test_iqr
+
+ test_min = np.nanmin(X)
+ if test_lower_bound < test_min:
+ test_lower_bound = test_min
+
+ X[X > test_upper_bound] = test_upper_bound
+ X[X < test_lower_bound] = test_lower_bound
+
+ X = (X - test_lower_bound) / (test_upper_bound - test_lower_bound)\
+ * (self.max_ - self.min_) + self.min_
+
+ return X
+
+ def inverse_transform(self, X):
+ """
+ Scale the data back to the original state
+ """
+ raise NotImplementedError("Inverse transformation is not implemented!")
diff -r 39ae276e75d9 -r e94395c672bd search_model_validation.py
--- a/search_model_validation.py Sun Dec 30 01:56:11 2018 -0500
+++ b/search_model_validation.py Tue May 14 18:15:12 2019 -0400
@@ -1,7 +1,8 @@
+import argparse
+import collections
import imblearn
import json
import numpy as np
-import os
import pandas
import pickle
import skrebate
@@ -9,93 +10,124 @@
import sys
import xgboost
import warnings
+import iraps_classifier
+import model_validations
+import preprocessors
+import feature_selectors
from imblearn import under_sampling, over_sampling, combine
-from imblearn.pipeline import Pipeline as imbPipeline
-from sklearn import (cluster, compose, decomposition, ensemble, feature_extraction,
- feature_selection, gaussian_process, kernel_approximation, metrics,
- model_selection, naive_bayes, neighbors, pipeline, preprocessing,
- svm, linear_model, tree, discriminant_analysis)
+from scipy.io import mmread
+from mlxtend import classifier, regressor
+from sklearn import (cluster, compose, decomposition, ensemble,
+ feature_extraction, feature_selection,
+ gaussian_process, kernel_approximation, metrics,
+ model_selection, naive_bayes, neighbors,
+ pipeline, preprocessing, svm, linear_model,
+ tree, discriminant_analysis)
from sklearn.exceptions import FitFailedWarning
from sklearn.externals import joblib
-from utils import get_cv, get_scoring, get_X_y, load_model, read_columns, SafeEval
+from sklearn.model_selection._validation import _score
+
+from utils import (SafeEval, get_cv, get_scoring, get_X_y,
+ load_model, read_columns)
+from model_validations import train_test_split
-N_JOBS = int(os.environ.get('GALAXY_SLOTS', 1))
+N_JOBS = int(__import__('os').environ.get('GALAXY_SLOTS', 1))
+CACHE_DIR = './cached'
+NON_SEARCHABLE = ('n_jobs', 'pre_dispatch', 'memory', 'steps',
+ 'nthread', 'verbose')
-def get_search_params(params_builder):
+def _eval_search_params(params_builder):
search_params = {}
- safe_eval = SafeEval(load_scipy=True, load_numpy=True)
- safe_eval_es = SafeEval(load_estimators=True)
for p in params_builder['param_set']:
- search_p = p['search_param_selector']['search_p']
- if search_p.strip() == '':
+ search_list = p['sp_list'].strip()
+ if search_list == '':
continue
- param_type = p['search_param_selector']['selected_param_type']
+
+ param_name = p['sp_name']
+ if param_name.lower().endswith(NON_SEARCHABLE):
+ print("Warning: `%s` is not eligible for search and was "
+ "omitted!" % param_name)
+ continue
- lst = search_p.split(':')
- assert (len(lst) == 2), "Error, make sure there is one and only one colon in search parameter input."
- literal = lst[1].strip()
- param_name = lst[0].strip()
- if param_name:
- if param_name.lower() == 'n_jobs':
- sys.exit("Parameter `%s` is invalid for search." %param_name)
- elif not param_name.endswith('-'):
- ev = safe_eval(literal)
- if param_type == 'final_estimator_p':
- search_params['estimator__' + param_name] = ev
- else:
- search_params['preprocessing_' + param_type[5:6] + '__' + param_name] = ev
- else:
- # only for estimator eval, add `-` to the end of param
- #TODO maybe add regular express check
- ev = safe_eval_es(literal)
- for obj in ev:
- if 'n_jobs' in obj.get_params():
- obj.set_params( n_jobs=N_JOBS )
- if param_type == 'final_estimator_p':
- search_params['estimator__' + param_name[:-1]] = ev
- else:
- search_params['preprocessing_' + param_type[5:6] + '__' + param_name[:-1]] = ev
- elif param_type != 'final_estimator_p':
- #TODO regular express check ?
- ev = safe_eval_es(literal)
- preprocessors = [preprocessing.StandardScaler(), preprocessing.Binarizer(), preprocessing.Imputer(),
- preprocessing.MaxAbsScaler(), preprocessing.Normalizer(), preprocessing.MinMaxScaler(),
- preprocessing.PolynomialFeatures(),preprocessing.RobustScaler(),
- feature_selection.SelectKBest(), feature_selection.GenericUnivariateSelect(),
- feature_selection.SelectPercentile(), feature_selection.SelectFpr(), feature_selection.SelectFdr(),
- feature_selection.SelectFwe(), feature_selection.VarianceThreshold(),
- decomposition.FactorAnalysis(random_state=0), decomposition.FastICA(random_state=0), decomposition.IncrementalPCA(),
- decomposition.KernelPCA(random_state=0, n_jobs=N_JOBS), decomposition.LatentDirichletAllocation(random_state=0, n_jobs=N_JOBS),
- decomposition.MiniBatchDictionaryLearning(random_state=0, n_jobs=N_JOBS),
- decomposition.MiniBatchSparsePCA(random_state=0, n_jobs=N_JOBS), decomposition.NMF(random_state=0),
- decomposition.PCA(random_state=0), decomposition.SparsePCA(random_state=0, n_jobs=N_JOBS),
- decomposition.TruncatedSVD(random_state=0),
- kernel_approximation.Nystroem(random_state=0), kernel_approximation.RBFSampler(random_state=0),
- kernel_approximation.AdditiveChi2Sampler(), kernel_approximation.SkewedChi2Sampler(random_state=0),
- cluster.FeatureAgglomeration(),
- skrebate.ReliefF(n_jobs=N_JOBS), skrebate.SURF(n_jobs=N_JOBS), skrebate.SURFstar(n_jobs=N_JOBS),
- skrebate.MultiSURF(n_jobs=N_JOBS), skrebate.MultiSURFstar(n_jobs=N_JOBS),
- imblearn.under_sampling.ClusterCentroids(random_state=0, n_jobs=N_JOBS),
- imblearn.under_sampling.CondensedNearestNeighbour(random_state=0, n_jobs=N_JOBS),
- imblearn.under_sampling.EditedNearestNeighbours(random_state=0, n_jobs=N_JOBS),
- imblearn.under_sampling.RepeatedEditedNearestNeighbours(random_state=0, n_jobs=N_JOBS),
- imblearn.under_sampling.AllKNN(random_state=0, n_jobs=N_JOBS),
- imblearn.under_sampling.InstanceHardnessThreshold(random_state=0, n_jobs=N_JOBS),
- imblearn.under_sampling.NearMiss(random_state=0, n_jobs=N_JOBS),
- imblearn.under_sampling.NeighbourhoodCleaningRule(random_state=0, n_jobs=N_JOBS),
- imblearn.under_sampling.OneSidedSelection(random_state=0, n_jobs=N_JOBS),
- imblearn.under_sampling.RandomUnderSampler(random_state=0),
- imblearn.under_sampling.TomekLinks(random_state=0, n_jobs=N_JOBS),
- imblearn.over_sampling.ADASYN(random_state=0, n_jobs=N_JOBS),
- imblearn.over_sampling.RandomOverSampler(random_state=0),
- imblearn.over_sampling.SMOTE(random_state=0, n_jobs=N_JOBS),
- imblearn.over_sampling.SVMSMOTE(random_state=0, n_jobs=N_JOBS),
- imblearn.over_sampling.BorderlineSMOTE(random_state=0, n_jobs=N_JOBS),
- imblearn.over_sampling.SMOTENC(categorical_features=[], random_state=0, n_jobs=N_JOBS),
- imblearn.combine.SMOTEENN(random_state=0), imblearn.combine.SMOTETomek(random_state=0)]
+ if not search_list.startswith(':'):
+ safe_eval = SafeEval(load_scipy=True, load_numpy=True)
+ ev = safe_eval(search_list)
+ search_params[param_name] = ev
+ else:
+ # Have `:` before search list, asks for estimator evaluatio
+ safe_eval_es = SafeEval(load_estimators=True)
+ search_list = search_list[1:].strip()
+ # TODO maybe add regular express check
+ ev = safe_eval_es(search_list)
+ preprocessors = (
+ preprocessing.StandardScaler(), preprocessing.Binarizer(),
+ preprocessing.Imputer(), preprocessing.MaxAbsScaler(),
+ preprocessing.Normalizer(), preprocessing.MinMaxScaler(),
+ preprocessing.PolynomialFeatures(),
+ preprocessing.RobustScaler(), feature_selection.SelectKBest(),
+ feature_selection.GenericUnivariateSelect(),
+ feature_selection.SelectPercentile(),
+ feature_selection.SelectFpr(), feature_selection.SelectFdr(),
+ feature_selection.SelectFwe(),
+ feature_selection.VarianceThreshold(),
+ decomposition.FactorAnalysis(random_state=0),
+ decomposition.FastICA(random_state=0),
+ decomposition.IncrementalPCA(),
+ decomposition.KernelPCA(random_state=0, n_jobs=N_JOBS),
+ decomposition.LatentDirichletAllocation(
+ random_state=0, n_jobs=N_JOBS),
+ decomposition.MiniBatchDictionaryLearning(
+ random_state=0, n_jobs=N_JOBS),
+ decomposition.MiniBatchSparsePCA(
+ random_state=0, n_jobs=N_JOBS),
+ decomposition.NMF(random_state=0),
+ decomposition.PCA(random_state=0),
+ decomposition.SparsePCA(random_state=0, n_jobs=N_JOBS),
+ decomposition.TruncatedSVD(random_state=0),
+ kernel_approximation.Nystroem(random_state=0),
+ kernel_approximation.RBFSampler(random_state=0),
+ kernel_approximation.AdditiveChi2Sampler(),
+ kernel_approximation.SkewedChi2Sampler(random_state=0),
+ cluster.FeatureAgglomeration(),
+ skrebate.ReliefF(n_jobs=N_JOBS),
+ skrebate.SURF(n_jobs=N_JOBS),
+ skrebate.SURFstar(n_jobs=N_JOBS),
+ skrebate.MultiSURF(n_jobs=N_JOBS),
+ skrebate.MultiSURFstar(n_jobs=N_JOBS),
+ imblearn.under_sampling.ClusterCentroids(
+ random_state=0, n_jobs=N_JOBS),
+ imblearn.under_sampling.CondensedNearestNeighbour(
+ random_state=0, n_jobs=N_JOBS),
+ imblearn.under_sampling.EditedNearestNeighbours(
+ random_state=0, n_jobs=N_JOBS),
+ imblearn.under_sampling.RepeatedEditedNearestNeighbours(
+ random_state=0, n_jobs=N_JOBS),
+ imblearn.under_sampling.AllKNN(random_state=0, n_jobs=N_JOBS),
+ imblearn.under_sampling.InstanceHardnessThreshold(
+ random_state=0, n_jobs=N_JOBS),
+ imblearn.under_sampling.NearMiss(
+ random_state=0, n_jobs=N_JOBS),
+ imblearn.under_sampling.NeighbourhoodCleaningRule(
+ random_state=0, n_jobs=N_JOBS),
+ imblearn.under_sampling.OneSidedSelection(
+ random_state=0, n_jobs=N_JOBS),
+ imblearn.under_sampling.RandomUnderSampler(
+ random_state=0),
+ imblearn.under_sampling.TomekLinks(
+ random_state=0, n_jobs=N_JOBS),
+ imblearn.over_sampling.ADASYN(random_state=0, n_jobs=N_JOBS),
+ imblearn.over_sampling.RandomOverSampler(random_state=0),
+ imblearn.over_sampling.SMOTE(random_state=0, n_jobs=N_JOBS),
+ imblearn.over_sampling.SVMSMOTE(random_state=0, n_jobs=N_JOBS),
+ imblearn.over_sampling.BorderlineSMOTE(
+ random_state=0, n_jobs=N_JOBS),
+ imblearn.over_sampling.SMOTENC(
+ categorical_features=[], random_state=0, n_jobs=N_JOBS),
+ imblearn.combine.SMOTEENN(random_state=0),
+ imblearn.combine.SMOTETomek(random_state=0))
newlist = []
for obj in ev:
if obj is None:
@@ -114,87 +146,102 @@
newlist.extend(preprocessors[31:36])
elif obj == 'imb_all':
newlist.extend(preprocessors[36:55])
- elif type(obj) is int and -1 < obj < len(preprocessors):
+ elif type(obj) is int and -1 < obj < len(preprocessors):
newlist.append(preprocessors[obj])
- elif hasattr(obj, 'get_params'): # user object
+ elif hasattr(obj, 'get_params'): # user uploaded object
if 'n_jobs' in obj.get_params():
- newlist.append( obj.set_params(n_jobs=N_JOBS) )
+ newlist.append(obj.set_params(n_jobs=N_JOBS))
else:
newlist.append(obj)
else:
- sys.exit("Unsupported preprocessor type: %r" %(obj))
- search_params['preprocessing_' + param_type[5:6]] = newlist
- else:
- sys.exit("Parameter name of the final estimator can't be skipped!")
+ sys.exit("Unsupported estimator type: %r" % (obj))
+
+ search_params[param_name] = newlist
return search_params
-if __name__ == '__main__':
+def main(inputs, infile_estimator, infile1, infile2,
+ outfile_result, outfile_object=None, groups=None):
+ """
+ Parameter
+ ---------
+ inputs : str
+ File path to galaxy tool parameter
+
+ infile_estimator : str
+ File path to estimator
+
+ infile1 : str
+ File path to dataset containing features
+
+ infile2 : str
+ File path to dataset containing target values
+
+ outfile_result : str
+ File path to save the results, either cv_results or test result
+
+ outfile_object : str, optional
+ File path to save searchCV object
+
+ groups : str
+ File path to dataset containing groups labels
+ """
warnings.simplefilter('ignore')
- input_json_path = sys.argv[1]
- with open(input_json_path, 'r') as param_handler:
+ with open(inputs, 'r') as param_handler:
params = json.load(param_handler)
-
- infile_pipeline = sys.argv[2]
- infile1 = sys.argv[3]
- infile2 = sys.argv[4]
- outfile_result = sys.argv[5]
- if len(sys.argv) > 6:
- outfile_estimator = sys.argv[6]
- else:
- outfile_estimator = None
+ if groups:
+ (params['search_schemes']['options']['cv_selector']
+ ['groups_selector']['infile_g']) = groups
params_builder = params['search_schemes']['search_params_builder']
input_type = params['input_options']['selected_input']
if input_type == 'tabular':
header = 'infer' if params['input_options']['header1'] else None
- column_option = params['input_options']['column_selector_options_1']['selected_column_selector_option']
- if column_option in ['by_index_number', 'all_but_by_index_number', 'by_header_name', 'all_but_by_header_name']:
+ column_option = (params['input_options']['column_selector_options_1']
+ ['selected_column_selector_option'])
+ if column_option in ['by_index_number', 'all_but_by_index_number',
+ 'by_header_name', 'all_but_by_header_name']:
c = params['input_options']['column_selector_options_1']['col1']
else:
c = None
X = read_columns(
infile1,
- c = c,
- c_option = column_option,
+ c=c,
+ c_option=column_option,
sep='\t',
header=header,
- parse_dates=True
- )
+ parse_dates=True).astype(float)
else:
X = mmread(open(infile1, 'r'))
header = 'infer' if params['input_options']['header2'] else None
- column_option = params['input_options']['column_selector_options_2']['selected_column_selector_option2']
- if column_option in ['by_index_number', 'all_but_by_index_number', 'by_header_name', 'all_but_by_header_name']:
+ column_option = (params['input_options']['column_selector_options_2']
+ ['selected_column_selector_option2'])
+ if column_option in ['by_index_number', 'all_but_by_index_number',
+ 'by_header_name', 'all_but_by_header_name']:
c = params['input_options']['column_selector_options_2']['col2']
else:
c = None
y = read_columns(
infile2,
- c = c,
- c_option = column_option,
+ c=c,
+ c_option=column_option,
sep='\t',
header=header,
- parse_dates=True
- )
+ parse_dates=True)
y = y.ravel()
optimizer = params['search_schemes']['selected_search_scheme']
optimizer = getattr(model_selection, optimizer)
options = params['search_schemes']['options']
+
splitter, groups = get_cv(options.pop('cv_selector'))
- if groups is None:
- options['cv'] = splitter
- elif groups == '':
- options['cv'] = list( splitter.split(X, y, groups=None) )
- else:
- options['cv'] = list( splitter.split(X, y, groups=groups) )
+ options['cv'] = splitter
options['n_jobs'] = N_JOBS
primary_scoring = options['scoring']['primary_scoring']
options['scoring'] = get_scoring(options['scoring'])
@@ -203,32 +250,117 @@
else:
options['error_score'] = np.NaN
if options['refit'] and isinstance(options['scoring'], dict):
- options['refit'] = 'primary'
+ options['refit'] = primary_scoring
if 'pre_dispatch' in options and options['pre_dispatch'] == '':
options['pre_dispatch'] = None
- with open(infile_pipeline, 'rb') as pipeline_handler:
- pipeline = load_model(pipeline_handler)
+ with open(infile_estimator, 'rb') as estimator_handler:
+ estimator = load_model(estimator_handler)
+
+ memory = joblib.Memory(location=CACHE_DIR, verbose=0)
+ # cache iraps_core fits could increase search speed significantly
+ if estimator.__class__.__name__ == 'IRAPSClassifier':
+ estimator.set_params(memory=memory)
+ else:
+ for p, v in estimator.get_params().items():
+ if p.endswith('memory'):
+ if len(p) > 8 and p[:-8].endswith('irapsclassifier'):
+ # cache iraps_core fits could increase search
+ # speed significantly
+ new_params = {p: memory}
+ estimator.set_params(**new_params)
+ elif v:
+ new_params = {p, None}
+ estimator.set_params(**new_params)
+ elif p.endswith('n_jobs'):
+ new_params = {p: 1}
+ estimator.set_params(**new_params)
+
+ param_grid = _eval_search_params(params_builder)
+ searcher = optimizer(estimator, param_grid, **options)
- search_params = get_search_params(params_builder)
- searcher = optimizer(pipeline, search_params, **options)
+ # do train_test_split
+ do_train_test_split = params['train_test_split'].pop('do_split')
+ if do_train_test_split == 'yes':
+ # make sure refit is choosen
+ if not options['refit']:
+ raise ValueError("Refit must be `True` for shuffle splitting!")
+ split_options = params['train_test_split']
+
+ # splits
+ if split_options['shuffle'] == 'stratified':
+ split_options['labels'] = y
+ X, X_test, y, y_test = train_test_split(X, y, **split_options)
+ elif split_options['shuffle'] == 'group':
+ if not groups:
+ raise ValueError("No group based CV option was "
+ "choosen for group shuffle!")
+ split_options['labels'] = groups
+ X, X_test, y, y_test, groups, _ =\
+ train_test_split(X, y, **split_options)
+ else:
+ if split_options['shuffle'] == 'None':
+ split_options['shuffle'] = None
+ X, X_test, y, y_test =\
+ train_test_split(X, y, **split_options)
+ # end train_test_split
if options['error_score'] == 'raise':
- searcher.fit(X, y)
+ searcher.fit(X, y, groups=groups)
else:
warnings.simplefilter('always', FitFailedWarning)
with warnings.catch_warnings(record=True) as w:
try:
- searcher.fit(X, y)
+ searcher.fit(X, y, groups=groups)
except ValueError:
pass
for warning in w:
print(repr(warning.message))
- cv_result = pandas.DataFrame(searcher.cv_results_)
- cv_result.rename(inplace=True, columns={'mean_test_primary': 'mean_test_'+primary_scoring, 'rank_test_primary': 'rank_test_'+primary_scoring})
- cv_result.to_csv(path_or_buf=outfile_result, sep='\t', header=True, index=False)
+ if do_train_test_split == 'no':
+ # save results
+ cv_results = pandas.DataFrame(searcher.cv_results_)
+ cv_results = cv_results[sorted(cv_results.columns)]
+ cv_results.to_csv(path_or_buf=outfile_result, sep='\t',
+ header=True, index=False)
+
+ # output test result using best_estimator_
+ else:
+ best_estimator_ = searcher.best_estimator_
+ if isinstance(options['scoring'], collections.Mapping):
+ is_multimetric = True
+ else:
+ is_multimetric = False
- if outfile_estimator:
- with open(outfile_estimator, 'wb') as output_handler:
- pickle.dump(searcher.best_estimator_, output_handler, pickle.HIGHEST_PROTOCOL)
+ test_score = _score(best_estimator_, X_test,
+ y_test, options['scoring'],
+ is_multimetric=is_multimetric)
+ if not is_multimetric:
+ test_score = {primary_scoring: test_score}
+ for key, value in test_score.items():
+ test_score[key] = [value]
+ result_df = pandas.DataFrame(test_score)
+ result_df.to_csv(path_or_buf=outfile_result, sep='\t',
+ header=True, index=False)
+
+ memory.clear(warn=False)
+
+ if outfile_object:
+ with open(outfile_object, 'wb') as output_handler:
+ pickle.dump(searcher, output_handler, pickle.HIGHEST_PROTOCOL)
+
+
+if __name__ == '__main__':
+ aparser = argparse.ArgumentParser()
+ aparser.add_argument("-i", "--inputs", dest="inputs", required=True)
+ aparser.add_argument("-e", "--estimator", dest="infile_estimator")
+ aparser.add_argument("-X", "--infile1", dest="infile1")
+ aparser.add_argument("-y", "--infile2", dest="infile2")
+ aparser.add_argument("-r", "--outfile_result", dest="outfile_result")
+ aparser.add_argument("-o", "--outfile_object", dest="outfile_object")
+ aparser.add_argument("-g", "--groups", dest="groups")
+ args = aparser.parse_args()
+
+ main(args.inputs, args.infile_estimator, args.infile1, args.infile2,
+ args.outfile_result, outfile_object=args.outfile_object,
+ groups=args.groups)
diff -r 39ae276e75d9 -r e94395c672bd sk_whitelist.json
--- a/sk_whitelist.json Sun Dec 30 01:56:11 2018 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,761 +0,0 @@
-{ "SK_NAMES": [
- "sklearn._ASSUME_FINITE", "sklearn._isotonic._inplace_contiguous_isotonic_regression",
- "sklearn._isotonic._make_unique", "sklearn.base.BaseEstimator",
- "sklearn.base.BiclusterMixin", "sklearn.base.ClassifierMixin",
- "sklearn.base.ClusterMixin", "sklearn.base.DensityMixin",
- "sklearn.base.MetaEstimatorMixin", "sklearn.base.RegressorMixin",
- "sklearn.base.TransformerMixin", "sklearn.base._first_and_last_element",
- "sklearn.base._pprint", "sklearn.base.clone",
- "sklearn.base.is_classifier", "sklearn.base.is_regressor",
- "sklearn.clone", "sklearn.cluster.AffinityPropagation",
- "sklearn.cluster.AgglomerativeClustering", "sklearn.cluster.Birch",
- "sklearn.cluster.DBSCAN", "sklearn.cluster.FeatureAgglomeration",
- "sklearn.cluster.KMeans", "sklearn.cluster.MeanShift",
- "sklearn.cluster.MiniBatchKMeans", "sklearn.cluster.SpectralBiclustering",
- "sklearn.cluster.SpectralClustering", "sklearn.cluster.SpectralCoclustering",
- "sklearn.cluster._dbscan_inner.dbscan_inner", "sklearn.cluster._feature_agglomeration.AgglomerationTransform",
- "sklearn.cluster._hierarchical.WeightedEdge", "sklearn.cluster._hierarchical._get_parents",
- "sklearn.cluster._hierarchical._hc_get_descendent", "sklearn.cluster._hierarchical.average_merge",
- "sklearn.cluster._hierarchical.compute_ward_dist", "sklearn.cluster._hierarchical.hc_get_heads",
- "sklearn.cluster._hierarchical.max_merge", "sklearn.cluster._k_means._assign_labels_array",
- "sklearn.cluster._k_means._assign_labels_csr", "sklearn.cluster._k_means._centers_dense",
- "sklearn.cluster._k_means._centers_sparse", "sklearn.cluster._k_means._mini_batch_update_csr",
- "sklearn.cluster._k_means_elkan.k_means_elkan", "sklearn.cluster.affinity_propagation",
- "sklearn.cluster.affinity_propagation_.AffinityPropagation", "sklearn.cluster.affinity_propagation_.affinity_propagation",
- "sklearn.cluster.bicluster.BaseSpectral", "sklearn.cluster.bicluster.SpectralBiclustering",
- "sklearn.cluster.bicluster.SpectralCoclustering", "sklearn.cluster.bicluster._bistochastic_normalize",
- "sklearn.cluster.bicluster._log_normalize", "sklearn.cluster.bicluster._scale_normalize",
- "sklearn.cluster.birch.Birch", "sklearn.cluster.birch._CFNode",
- "sklearn.cluster.birch._CFSubcluster", "sklearn.cluster.birch._iterate_sparse_X",
- "sklearn.cluster.birch._split_node", "sklearn.cluster.dbscan",
- "sklearn.cluster.dbscan_.DBSCAN", "sklearn.cluster.dbscan_.dbscan",
- "sklearn.cluster.estimate_bandwidth", "sklearn.cluster.get_bin_seeds",
- "sklearn.cluster.hierarchical.AgglomerativeClustering", "sklearn.cluster.hierarchical.FeatureAgglomeration",
- "sklearn.cluster.hierarchical._TREE_BUILDERS", "sklearn.cluster.hierarchical._average_linkage",
- "sklearn.cluster.hierarchical._complete_linkage", "sklearn.cluster.hierarchical._fix_connectivity",
- "sklearn.cluster.hierarchical._hc_cut", "sklearn.cluster.hierarchical.linkage_tree",
- "sklearn.cluster.hierarchical.ward_tree", "sklearn.cluster.k_means",
- "sklearn.cluster.k_means_.FLOAT_DTYPES", "sklearn.cluster.k_means_.KMeans",
- "sklearn.cluster.k_means_.MiniBatchKMeans", "sklearn.cluster.k_means_._init_centroids",
- "sklearn.cluster.k_means_._k_init", "sklearn.cluster.k_means_._kmeans_single_elkan",
- "sklearn.cluster.k_means_._kmeans_single_lloyd", "sklearn.cluster.k_means_._labels_inertia",
- "sklearn.cluster.k_means_._labels_inertia_precompute_dense", "sklearn.cluster.k_means_._mini_batch_convergence",
- "sklearn.cluster.k_means_._mini_batch_step", "sklearn.cluster.k_means_._tolerance",
- "sklearn.cluster.k_means_._validate_center_shape", "sklearn.cluster.k_means_.k_means",
- "sklearn.cluster.k_means_.string_types", "sklearn.cluster.linkage_tree",
- "sklearn.cluster.mean_shift", "sklearn.cluster.mean_shift_.MeanShift",
- "sklearn.cluster.mean_shift_._mean_shift_single_seed", "sklearn.cluster.mean_shift_.estimate_bandwidth",
- "sklearn.cluster.mean_shift_.get_bin_seeds", "sklearn.cluster.mean_shift_.mean_shift",
- "sklearn.cluster.spectral.SpectralClustering", "sklearn.cluster.spectral.discretize",
- "sklearn.cluster.spectral.spectral_clustering", "sklearn.cluster.spectral_clustering",
- "sklearn.cluster.ward_tree", "sklearn.config_context", "sklearn.compose.TransformedTargetRegressor",
- "sklearn.compose._target.TransformedTargetRegressor", "sklearn.compose.ColumnTransformer",
- "sklearn.compose._column_transformer.ColumnTransformer", "sklearn.compose.make_column_transformer",
- "sklearn.compose._column_transformer.make_column_transformer",
- "sklearn.covariance.EllipticEnvelope", "sklearn.covariance.EmpiricalCovariance",
- "sklearn.covariance.GraphLasso", "sklearn.covariance.GraphLassoCV",
- "sklearn.covariance.LedoitWolf", "sklearn.covariance.MinCovDet",
- "sklearn.covariance.OAS", "sklearn.covariance.ShrunkCovariance",
- "sklearn.covariance.empirical_covariance", "sklearn.covariance.empirical_covariance_.EmpiricalCovariance",
- "sklearn.covariance.empirical_covariance_.empirical_covariance", "sklearn.covariance.empirical_covariance_.log_likelihood",
- "sklearn.covariance.fast_mcd", "sklearn.covariance.graph_lasso",
- "sklearn.covariance.graph_lasso_.GraphLasso", "sklearn.covariance.graph_lasso_.GraphLassoCV",
- "sklearn.covariance.graph_lasso_._dual_gap", "sklearn.covariance.graph_lasso_._objective",
- "sklearn.covariance.graph_lasso_.alpha_max", "sklearn.covariance.graph_lasso_.graph_lasso",
- "sklearn.covariance.graph_lasso_.graph_lasso_path", "sklearn.covariance.ledoit_wolf",
- "sklearn.covariance.ledoit_wolf_shrinkage", "sklearn.covariance.log_likelihood",
- "sklearn.covariance.oas", "sklearn.covariance.outlier_detection.EllipticEnvelope",
- "sklearn.covariance.robust_covariance.MinCovDet", "sklearn.covariance.robust_covariance._c_step",
- "sklearn.covariance.robust_covariance.c_step", "sklearn.covariance.robust_covariance.fast_mcd",
- "sklearn.covariance.robust_covariance.select_candidates", "sklearn.covariance.shrunk_covariance",
- "sklearn.covariance.shrunk_covariance_.LedoitWolf", "sklearn.covariance.shrunk_covariance_.OAS",
- "sklearn.covariance.shrunk_covariance_.ShrunkCovariance", "sklearn.covariance.shrunk_covariance_.ledoit_wolf",
- "sklearn.covariance.shrunk_covariance_.ledoit_wolf_shrinkage", "sklearn.covariance.shrunk_covariance_.oas",
- "sklearn.covariance.shrunk_covariance_.shrunk_covariance", "sklearn.decomposition.DictionaryLearning",
- "sklearn.decomposition.FactorAnalysis", "sklearn.decomposition.FastICA",
- "sklearn.decomposition.IncrementalPCA", "sklearn.decomposition.KernelPCA",
- "sklearn.decomposition.LatentDirichletAllocation", "sklearn.decomposition.MiniBatchDictionaryLearning",
- "sklearn.decomposition.MiniBatchSparsePCA", "sklearn.decomposition.NMF",
- "sklearn.decomposition.PCA", "sklearn.decomposition.RandomizedPCA",
- "sklearn.decomposition.SparseCoder", "sklearn.decomposition.SparsePCA",
- "sklearn.decomposition.TruncatedSVD", "sklearn.decomposition._online_lda._dirichlet_expectation_1d",
- "sklearn.decomposition._online_lda._dirichlet_expectation_2d", "sklearn.decomposition._online_lda.mean_change",
- "sklearn.decomposition.base._BasePCA", "sklearn.decomposition.cdnmf_fast._update_cdnmf_fast",
- "sklearn.decomposition.dict_learning", "sklearn.decomposition.dict_learning_online",
- "sklearn.decomposition.factor_analysis.FactorAnalysis", "sklearn.decomposition.fastica",
- "sklearn.decomposition.fastica_.FLOAT_DTYPES", "sklearn.decomposition.fastica_.FastICA",
- "sklearn.decomposition.fastica_._cube", "sklearn.decomposition.fastica_._exp",
- "sklearn.decomposition.fastica_._gs_decorrelation", "sklearn.decomposition.fastica_._ica_def",
- "sklearn.decomposition.fastica_._ica_par", "sklearn.decomposition.fastica_._logcosh",
- "sklearn.decomposition.fastica_._sym_decorrelation", "sklearn.decomposition.fastica_.fastica",
- "sklearn.decomposition.fastica_.string_types", "sklearn.decomposition.incremental_pca.IncrementalPCA",
- "sklearn.decomposition.kernel_pca.KernelPCA", "sklearn.decomposition.nmf.EPSILON",
- "sklearn.decomposition.nmf.INTEGER_TYPES", "sklearn.decomposition.nmf.NMF",
- "sklearn.decomposition.nmf._beta_divergence", "sklearn.decomposition.nmf._beta_loss_to_float",
- "sklearn.decomposition.nmf._check_init", "sklearn.decomposition.nmf._check_string_param",
- "sklearn.decomposition.nmf._compute_regularization", "sklearn.decomposition.nmf._fit_coordinate_descent",
- "sklearn.decomposition.nmf._fit_multiplicative_update", "sklearn.decomposition.nmf._initialize_nmf",
- "sklearn.decomposition.nmf._multiplicative_update_h", "sklearn.decomposition.nmf._multiplicative_update_w",
- "sklearn.decomposition.nmf._special_sparse_dot", "sklearn.decomposition.nmf._update_coordinate_descent",
- "sklearn.decomposition.nmf.non_negative_factorization", "sklearn.decomposition.nmf.norm",
- "sklearn.decomposition.nmf.trace_dot", "sklearn.decomposition.non_negative_factorization",
- "sklearn.decomposition.online_lda.EPS", "sklearn.decomposition.online_lda.LatentDirichletAllocation",
- "sklearn.decomposition.online_lda._update_doc_distribution", "sklearn.decomposition.online_lda.gammaln",
- "sklearn.decomposition.pca.PCA", "sklearn.decomposition.pca.RandomizedPCA",
- "sklearn.decomposition.pca._assess_dimension_", "sklearn.decomposition.pca._infer_dimension_",
- "sklearn.decomposition.pca.gammaln", "sklearn.decomposition.sparse_encode",
- "sklearn.decomposition.sparse_pca.MiniBatchSparsePCA", "sklearn.decomposition.sparse_pca.SparsePCA",
- "sklearn.decomposition.truncated_svd.TruncatedSVD", "sklearn.discriminant_analysis.LinearDiscriminantAnalysis",
- "sklearn.discriminant_analysis.QuadraticDiscriminantAnalysis", "sklearn.discriminant_analysis._class_cov",
- "sklearn.discriminant_analysis._class_means", "sklearn.discriminant_analysis._cov",
- "sklearn.discriminant_analysis.string_types", "sklearn.ensemble.AdaBoostClassifier",
- "sklearn.ensemble.AdaBoostRegressor", "sklearn.ensemble.BaggingClassifier",
- "sklearn.ensemble.BaggingRegressor", "sklearn.ensemble.BaseEnsemble",
- "sklearn.ensemble.ExtraTreesClassifier", "sklearn.ensemble.ExtraTreesRegressor",
- "sklearn.ensemble.GradientBoostingClassifier", "sklearn.ensemble.GradientBoostingRegressor",
- "sklearn.ensemble.IsolationForest", "sklearn.ensemble.RandomForestClassifier",
- "sklearn.ensemble.RandomForestRegressor", "sklearn.ensemble.RandomTreesEmbedding",
- "sklearn.ensemble.VotingClassifier", "sklearn.ensemble._gradient_boosting._partial_dependence_tree",
- "sklearn.ensemble._gradient_boosting._predict_regression_tree_stages_sparse", "sklearn.ensemble._gradient_boosting._random_sample_mask",
- "sklearn.ensemble._gradient_boosting.predict_stage", "sklearn.ensemble._gradient_boosting.predict_stages",
- "sklearn.ensemble.bagging.BaggingClassifier", "sklearn.ensemble.bagging.BaggingRegressor",
- "sklearn.ensemble.bagging.BaseBagging", "sklearn.ensemble.bagging.MAX_INT",
- "sklearn.ensemble.bagging._generate_bagging_indices", "sklearn.ensemble.bagging._generate_indices",
- "sklearn.ensemble.bagging._parallel_build_estimators", "sklearn.ensemble.bagging._parallel_decision_function",
- "sklearn.ensemble.bagging._parallel_predict_log_proba", "sklearn.ensemble.bagging._parallel_predict_proba",
- "sklearn.ensemble.bagging._parallel_predict_regression", "sklearn.ensemble.base.BaseEnsemble",
- "sklearn.ensemble.base.MAX_RAND_SEED", "sklearn.ensemble.base._partition_estimators",
- "sklearn.ensemble.base._set_random_states", "sklearn.ensemble.forest.BaseForest",
- "sklearn.ensemble.forest.ExtraTreesClassifier", "sklearn.ensemble.forest.ExtraTreesRegressor",
- "sklearn.ensemble.forest.ForestClassifier", "sklearn.ensemble.forest.ForestRegressor",
- "sklearn.ensemble.forest.MAX_INT", "sklearn.ensemble.forest.RandomForestClassifier",
- "sklearn.ensemble.forest.RandomForestRegressor", "sklearn.ensemble.forest.RandomTreesEmbedding",
- "sklearn.ensemble.forest._generate_sample_indices", "sklearn.ensemble.forest._generate_unsampled_indices",
- "sklearn.ensemble.forest._parallel_build_trees", "sklearn.ensemble.forest.accumulate_prediction",
- "sklearn.ensemble.gradient_boosting.BaseGradientBoosting", "sklearn.ensemble.gradient_boosting.BinomialDeviance",
- "sklearn.ensemble.gradient_boosting.ClassificationLossFunction", "sklearn.ensemble.gradient_boosting.ExponentialLoss",
- "sklearn.ensemble.gradient_boosting.GradientBoostingClassifier", "sklearn.ensemble.gradient_boosting.GradientBoostingRegressor",
- "sklearn.ensemble.gradient_boosting.HuberLossFunction", "sklearn.ensemble.gradient_boosting.INIT_ESTIMATORS",
- "sklearn.ensemble.gradient_boosting.LOSS_FUNCTIONS", "sklearn.ensemble.gradient_boosting.LeastAbsoluteError",
- "sklearn.ensemble.gradient_boosting.LeastSquaresError", "sklearn.ensemble.gradient_boosting.LogOddsEstimator",
- "sklearn.ensemble.gradient_boosting.LossFunction", "sklearn.ensemble.gradient_boosting.MeanEstimator",
- "sklearn.ensemble.gradient_boosting.MultinomialDeviance", "sklearn.ensemble.gradient_boosting.PriorProbabilityEstimator",
- "sklearn.ensemble.gradient_boosting.QuantileEstimator", "sklearn.ensemble.gradient_boosting.QuantileLossFunction",
- "sklearn.ensemble.gradient_boosting.RegressionLossFunction", "sklearn.ensemble.gradient_boosting.ScaledLogOddsEstimator",
- "sklearn.ensemble.gradient_boosting.TREE_LEAF", "sklearn.ensemble.gradient_boosting.VerboseReporter",
- "sklearn.ensemble.gradient_boosting.ZeroEstimator", "sklearn.ensemble.gradient_boosting.expit",
- "sklearn.ensemble.iforest.INTEGER_TYPES", "sklearn.ensemble.iforest.IsolationForest",
- "sklearn.ensemble.iforest._average_path_length", "sklearn.ensemble.iforest.euler_gamma",
- "sklearn.ensemble.partial_dependence._grid_from_X", "sklearn.ensemble.partial_dependence.partial_dependence",
- "sklearn.ensemble.partial_dependence.plot_partial_dependence", "sklearn.ensemble.voting_classifier.VotingClassifier",
- "sklearn.ensemble.voting_classifier._parallel_fit_estimator", "sklearn.ensemble.weight_boosting.AdaBoostClassifier",
- "sklearn.ensemble.weight_boosting.AdaBoostRegressor", "sklearn.ensemble.weight_boosting.BaseWeightBoosting",
- "sklearn.ensemble.weight_boosting._samme_proba", "sklearn.ensemble.weight_boosting.inner1d",
- "sklearn.feature_extraction.DictVectorizer", "sklearn.feature_extraction.FeatureHasher",
- "sklearn.feature_extraction._hashing.transform", "sklearn.feature_extraction.dict_vectorizer.DictVectorizer",
- "sklearn.feature_extraction.dict_vectorizer._tosequence", "sklearn.feature_extraction.grid_to_graph",
- "sklearn.feature_extraction.hashing.FeatureHasher", "sklearn.feature_extraction.hashing._iteritems",
- "sklearn.feature_extraction.image.PatchExtractor", "sklearn.feature_extraction.image._compute_gradient_3d",
- "sklearn.feature_extraction.image._compute_n_patches", "sklearn.feature_extraction.image._make_edges_3d",
- "sklearn.feature_extraction.image._mask_edges_weights", "sklearn.feature_extraction.image._to_graph",
- "sklearn.feature_extraction.image.extract_patches", "sklearn.feature_extraction.image.extract_patches_2d",
- "sklearn.feature_extraction.image.grid_to_graph", "sklearn.feature_extraction.image.img_to_graph",
- "sklearn.feature_extraction.image.reconstruct_from_patches_2d", "sklearn.feature_extraction.img_to_graph",
- "sklearn.feature_extraction.stop_words.ENGLISH_STOP_WORDS", "sklearn.feature_extraction.text.CountVectorizer",
- "sklearn.feature_extraction.text.ENGLISH_STOP_WORDS", "sklearn.feature_extraction.text.HashingVectorizer",
- "sklearn.feature_extraction.text.TfidfTransformer", "sklearn.feature_extraction.text.TfidfVectorizer",
- "sklearn.feature_extraction.text.VectorizerMixin", "sklearn.feature_extraction.text._check_stop_list",
- "sklearn.feature_extraction.text._document_frequency", "sklearn.feature_extraction.text._make_int_array",
- "sklearn.feature_extraction.text.strip_accents_ascii", "sklearn.feature_extraction.text.strip_accents_unicode",
- "sklearn.feature_extraction.text.strip_tags", "sklearn.feature_selection.GenericUnivariateSelect",
- "sklearn.feature_selection.RFE", "sklearn.feature_selection.RFECV",
- "sklearn.feature_selection.SelectFdr", "sklearn.feature_selection.SelectFpr",
- "sklearn.feature_selection.SelectFromModel", "sklearn.feature_selection.SelectFwe",
- "sklearn.feature_selection.SelectKBest", "sklearn.feature_selection.SelectPercentile",
- "sklearn.feature_selection.VarianceThreshold", "sklearn.feature_selection.base.SelectorMixin",
- "sklearn.feature_selection.chi2", "sklearn.feature_selection.f_classif",
- "sklearn.feature_selection.f_oneway", "sklearn.feature_selection.f_regression",
- "sklearn.feature_selection.from_model.SelectFromModel", "sklearn.feature_selection.from_model._calculate_threshold",
- "sklearn.feature_selection.from_model._get_feature_importances", "sklearn.feature_selection.mutual_info_._compute_mi",
- "sklearn.feature_selection.mutual_info_._compute_mi_cc", "sklearn.feature_selection.mutual_info_._compute_mi_cd",
- "sklearn.feature_selection.mutual_info_._estimate_mi", "sklearn.feature_selection.mutual_info_._iterate_columns",
- "sklearn.feature_selection.mutual_info_.digamma", "sklearn.feature_selection.mutual_info_.mutual_info_classif",
- "sklearn.feature_selection.mutual_info_.mutual_info_regression", "sklearn.feature_selection.mutual_info_classif",
- "sklearn.feature_selection.mutual_info_regression", "sklearn.feature_selection.rfe.RFE",
- "sklearn.feature_selection.rfe.RFECV", "sklearn.feature_selection.rfe._rfe_single_fit",
- "sklearn.feature_selection.univariate_selection.GenericUnivariateSelect", "sklearn.feature_selection.univariate_selection.SelectFdr",
- "sklearn.feature_selection.univariate_selection.SelectFpr", "sklearn.feature_selection.univariate_selection.SelectFwe",
- "sklearn.feature_selection.univariate_selection.SelectKBest", "sklearn.feature_selection.univariate_selection.SelectPercentile",
- "sklearn.feature_selection.univariate_selection._BaseFilter", "sklearn.feature_selection.univariate_selection._chisquare",
- "sklearn.feature_selection.univariate_selection._clean_nans", "sklearn.feature_selection.univariate_selection.chi2",
- "sklearn.feature_selection.univariate_selection.f_classif", "sklearn.feature_selection.univariate_selection.f_oneway",
- "sklearn.feature_selection.univariate_selection.f_regression", "sklearn.feature_selection.variance_threshold.VarianceThreshold",
- "sklearn.gaussian_process.GaussianProcess", "sklearn.gaussian_process.GaussianProcessClassifier",
- "sklearn.gaussian_process.GaussianProcessRegressor", "sklearn.gaussian_process.correlation_models.absolute_exponential",
- "sklearn.gaussian_process.correlation_models.cubic", "sklearn.gaussian_process.correlation_models.generalized_exponential",
- "sklearn.gaussian_process.correlation_models.linear", "sklearn.gaussian_process.correlation_models.pure_nugget",
- "sklearn.gaussian_process.correlation_models.squared_exponential", "sklearn.gaussian_process.gaussian_process.GaussianProcess",
- "sklearn.gaussian_process.gaussian_process.MACHINE_EPSILON", "sklearn.gaussian_process.gaussian_process.l1_cross_distances",
- "sklearn.gaussian_process.gpc.COEFS", "sklearn.gaussian_process.gpc.GaussianProcessClassifier",
- "sklearn.gaussian_process.gpc.LAMBDAS", "sklearn.gaussian_process.gpc._BinaryGaussianProcessClassifierLaplace",
- "sklearn.gaussian_process.gpc.erf", "sklearn.gaussian_process.gpc.expit",
- "sklearn.gaussian_process.gpr.GaussianProcessRegressor", "sklearn.gaussian_process.kernels.CompoundKernel",
- "sklearn.gaussian_process.kernels.ConstantKernel", "sklearn.gaussian_process.kernels.DotProduct",
- "sklearn.gaussian_process.kernels.ExpSineSquared", "sklearn.gaussian_process.kernels.Exponentiation",
- "sklearn.gaussian_process.kernels.Hyperparameter", "sklearn.gaussian_process.kernels.Kernel",
- "sklearn.gaussian_process.kernels.KernelOperator", "sklearn.gaussian_process.kernels.Matern",
- "sklearn.gaussian_process.kernels.NormalizedKernelMixin", "sklearn.gaussian_process.kernels.PairwiseKernel",
- "sklearn.gaussian_process.kernels.Product", "sklearn.gaussian_process.kernels.RBF",
- "sklearn.gaussian_process.kernels.RationalQuadratic", "sklearn.gaussian_process.kernels.StationaryKernelMixin",
- "sklearn.gaussian_process.kernels.Sum", "sklearn.gaussian_process.kernels.WhiteKernel",
- "sklearn.gaussian_process.kernels._approx_fprime", "sklearn.gaussian_process.kernels._check_length_scale",
- "sklearn.gaussian_process.kernels.gamma", "sklearn.gaussian_process.kernels.kv",
- "sklearn.gaussian_process.regression_models.constant", "sklearn.gaussian_process.regression_models.linear",
- "sklearn.gaussian_process.regression_models.quadratic", "sklearn.get_config",
- "sklearn.isotonic.IsotonicRegression", "sklearn.isotonic.check_increasing",
- "sklearn.isotonic.isotonic_regression", "sklearn.kernel_approximation.AdditiveChi2Sampler",
- "sklearn.kernel_approximation.KERNEL_PARAMS", "sklearn.kernel_approximation.Nystroem",
- "sklearn.kernel_approximation.RBFSampler", "sklearn.kernel_approximation.SkewedChi2Sampler",
- "sklearn.kernel_ridge.KernelRidge", "sklearn.linear_model.ARDRegression",
- "sklearn.linear_model.BayesianRidge", "sklearn.linear_model.ElasticNet",
- "sklearn.linear_model.ElasticNetCV", "sklearn.linear_model.Hinge",
- "sklearn.linear_model.Huber", "sklearn.linear_model.HuberRegressor",
- "sklearn.linear_model.Lars", "sklearn.linear_model.LarsCV",
- "sklearn.linear_model.Lasso", "sklearn.linear_model.LassoCV",
- "sklearn.linear_model.LassoLars", "sklearn.linear_model.LassoLarsCV",
- "sklearn.linear_model.LassoLarsIC", "sklearn.linear_model.LinearRegression",
- "sklearn.linear_model.Log", "sklearn.linear_model.LogisticRegression",
- "sklearn.linear_model.LogisticRegressionCV", "sklearn.linear_model.ModifiedHuber",
- "sklearn.linear_model.MultiTaskElasticNet", "sklearn.linear_model.MultiTaskElasticNetCV",
- "sklearn.linear_model.MultiTaskLasso", "sklearn.linear_model.MultiTaskLassoCV",
- "sklearn.linear_model.OrthogonalMatchingPursuit", "sklearn.linear_model.OrthogonalMatchingPursuitCV",
- "sklearn.linear_model.PassiveAggressiveClassifier", "sklearn.linear_model.PassiveAggressiveRegressor",
- "sklearn.linear_model.Perceptron", "sklearn.linear_model.RANSACRegressor",
- "sklearn.linear_model.RandomizedLasso", "sklearn.linear_model.RandomizedLogisticRegression",
- "sklearn.linear_model.Ridge", "sklearn.linear_model.RidgeCV",
- "sklearn.linear_model.RidgeClassifier", "sklearn.linear_model.RidgeClassifierCV",
- "sklearn.linear_model.SGDClassifier", "sklearn.linear_model.SGDRegressor",
- "sklearn.linear_model.SquaredLoss", "sklearn.linear_model.TheilSenRegressor",
- "sklearn.linear_model.base.FLOAT_DTYPES", "sklearn.linear_model.base.LinearClassifierMixin",
- "sklearn.linear_model.base.LinearModel", "sklearn.linear_model.base.LinearRegression",
- "sklearn.linear_model.base.SPARSE_INTERCEPT_DECAY", "sklearn.linear_model.base.SparseCoefMixin",
- "sklearn.linear_model.base._pre_fit", "sklearn.linear_model.base._preprocess_data",
- "sklearn.linear_model.base._rescale_data", "sklearn.linear_model.base.center_data",
- "sklearn.linear_model.base.make_dataset", "sklearn.linear_model.base.sparse_center_data",
- "sklearn.linear_model.bayes.ARDRegression", "sklearn.linear_model.bayes.BayesianRidge",
- "sklearn.linear_model.cd_fast.enet_coordinate_descent", "sklearn.linear_model.cd_fast.enet_coordinate_descent_gram",
- "sklearn.linear_model.cd_fast.enet_coordinate_descent_multi_task", "sklearn.linear_model.cd_fast.sparse_enet_coordinate_descent",
- "sklearn.linear_model.coordinate_descent.ElasticNet", "sklearn.linear_model.coordinate_descent.ElasticNetCV",
- "sklearn.linear_model.coordinate_descent.Lasso", "sklearn.linear_model.coordinate_descent.LassoCV",
- "sklearn.linear_model.coordinate_descent.LinearModelCV", "sklearn.linear_model.coordinate_descent.MultiTaskElasticNet",
- "sklearn.linear_model.coordinate_descent.MultiTaskElasticNetCV", "sklearn.linear_model.coordinate_descent.MultiTaskLasso",
- "sklearn.linear_model.coordinate_descent.MultiTaskLassoCV", "sklearn.linear_model.coordinate_descent._alpha_grid",
- "sklearn.linear_model.coordinate_descent._path_residuals", "sklearn.linear_model.coordinate_descent.enet_path",
- "sklearn.linear_model.coordinate_descent.lasso_path", "sklearn.linear_model.enet_path",
- "sklearn.linear_model.huber.HuberRegressor", "sklearn.linear_model.huber._huber_loss_and_gradient",
- "sklearn.linear_model.lars_path", "sklearn.linear_model.lasso_path",
- "sklearn.linear_model.lasso_stability_path", "sklearn.linear_model.least_angle.Lars",
- "sklearn.linear_model.least_angle.LarsCV", "sklearn.linear_model.least_angle.LassoLars",
- "sklearn.linear_model.least_angle.LassoLarsCV", "sklearn.linear_model.least_angle.LassoLarsIC",
- "sklearn.linear_model.least_angle._check_copy_and_writeable", "sklearn.linear_model.least_angle._lars_path_residues",
- "sklearn.linear_model.least_angle.lars_path", "sklearn.linear_model.least_angle.solve_triangular_args",
- "sklearn.linear_model.least_angle.string_types", "sklearn.linear_model.logistic.LogisticRegression",
- "sklearn.linear_model.logistic.LogisticRegressionCV", "sklearn.linear_model.logistic.SCORERS",
- "sklearn.linear_model.logistic._check_solver_option", "sklearn.linear_model.logistic._intercept_dot",
- "sklearn.linear_model.logistic._log_reg_scoring_path", "sklearn.linear_model.logistic._logistic_grad_hess",
- "sklearn.linear_model.logistic._logistic_loss", "sklearn.linear_model.logistic._logistic_loss_and_grad",
- "sklearn.linear_model.logistic._multinomial_grad_hess", "sklearn.linear_model.logistic._multinomial_loss",
- "sklearn.linear_model.logistic._multinomial_loss_grad", "sklearn.linear_model.logistic.expit",
- "sklearn.linear_model.logistic.logistic_regression_path", "sklearn.linear_model.logistic_regression_path",
- "sklearn.linear_model.omp.OrthogonalMatchingPursuit", "sklearn.linear_model.omp.OrthogonalMatchingPursuitCV",
- "sklearn.linear_model.omp._cholesky_omp", "sklearn.linear_model.omp._gram_omp",
- "sklearn.linear_model.omp._omp_path_residues", "sklearn.linear_model.omp.orthogonal_mp",
- "sklearn.linear_model.omp.orthogonal_mp_gram", "sklearn.linear_model.omp.premature",
- "sklearn.linear_model.omp.solve_triangular_args", "sklearn.linear_model.orthogonal_mp",
- "sklearn.linear_model.orthogonal_mp_gram", "sklearn.linear_model.passive_aggressive.DEFAULT_EPSILON",
- "sklearn.linear_model.passive_aggressive.PassiveAggressiveClassifier", "sklearn.linear_model.passive_aggressive.PassiveAggressiveRegressor",
- "sklearn.linear_model.perceptron.Perceptron", "sklearn.linear_model.randomized_l1.BaseRandomizedLinearModel",
- "sklearn.linear_model.randomized_l1.RandomizedLasso", "sklearn.linear_model.randomized_l1.RandomizedLogisticRegression",
- "sklearn.linear_model.randomized_l1._lasso_stability_path", "sklearn.linear_model.randomized_l1._randomized_lasso",
- "sklearn.linear_model.randomized_l1._randomized_logistic", "sklearn.linear_model.randomized_l1._resample_model",
- "sklearn.linear_model.randomized_l1.lasso_stability_path", "sklearn.linear_model.ransac.RANSACRegressor",
- "sklearn.linear_model.ransac._EPSILON", "sklearn.linear_model.ransac._dynamic_max_trials",
- "sklearn.linear_model.ridge.Ridge", "sklearn.linear_model.ridge.RidgeCV",
- "sklearn.linear_model.ridge.RidgeClassifier", "sklearn.linear_model.ridge.RidgeClassifierCV",
- "sklearn.linear_model.ridge._BaseRidge", "sklearn.linear_model.ridge._BaseRidgeCV",
- "sklearn.linear_model.ridge._RidgeGCV", "sklearn.linear_model.ridge._solve_cholesky",
- "sklearn.linear_model.ridge._solve_cholesky_kernel", "sklearn.linear_model.ridge._solve_lsqr",
- "sklearn.linear_model.ridge._solve_sparse_cg", "sklearn.linear_model.ridge._solve_svd",
- "sklearn.linear_model.ridge.ridge_regression", "sklearn.linear_model.ridge_regression",
- "sklearn.linear_model.sag.get_auto_step_size", "sklearn.linear_model.sag.sag",
- "sklearn.linear_model.sag.sag_solver", "sklearn.linear_model.sag_fast.MultinomialLogLoss",
- "sklearn.linear_model.sag_fast._multinomial_grad_loss_all_samples", "sklearn.linear_model.sag_fast.sag",
- "sklearn.linear_model.sgd_fast.Classification", "sklearn.linear_model.sgd_fast.EpsilonInsensitive",
- "sklearn.linear_model.sgd_fast.Hinge", "sklearn.linear_model.sgd_fast.Huber",
- "sklearn.linear_model.sgd_fast.Log", "sklearn.linear_model.sgd_fast.LossFunction",
- "sklearn.linear_model.sgd_fast.ModifiedHuber", "sklearn.linear_model.sgd_fast.Regression",
- "sklearn.linear_model.sgd_fast.SquaredEpsilonInsensitive", "sklearn.linear_model.sgd_fast.SquaredHinge",
- "sklearn.linear_model.sgd_fast.SquaredLoss", "sklearn.linear_model.sgd_fast._plain_sgd",
- "sklearn.linear_model.sgd_fast.average_sgd", "sklearn.linear_model.sgd_fast.plain_sgd",
- "sklearn.linear_model.stochastic_gradient.BaseSGD", "sklearn.linear_model.stochastic_gradient.BaseSGDClassifier",
- "sklearn.linear_model.stochastic_gradient.BaseSGDRegressor", "sklearn.linear_model.stochastic_gradient.DEFAULT_EPSILON",
- "sklearn.linear_model.stochastic_gradient.LEARNING_RATE_TYPES", "sklearn.linear_model.stochastic_gradient.PENALTY_TYPES",
- "sklearn.linear_model.stochastic_gradient.SGDClassifier", "sklearn.linear_model.stochastic_gradient.SGDRegressor",
- "sklearn.linear_model.stochastic_gradient._prepare_fit_binary", "sklearn.linear_model.stochastic_gradient.fit_binary",
- "sklearn.linear_model.theil_sen.TheilSenRegressor", "sklearn.linear_model.theil_sen._EPSILON",
- "sklearn.linear_model.theil_sen._breakdown_point", "sklearn.linear_model.theil_sen._lstsq",
- "sklearn.linear_model.theil_sen._modified_weiszfeld_step", "sklearn.linear_model.theil_sen._spatial_median",
- "sklearn.linear_model.theil_sen.binom", "sklearn.manifold.Isomap",
- "sklearn.manifold.LocallyLinearEmbedding", "sklearn.manifold.MDS",
- "sklearn.manifold.SpectralEmbedding", "sklearn.manifold.TSNE",
- "sklearn.manifold._barnes_hut_tsne.gradient", "sklearn.manifold._utils._binary_search_perplexity",
- "sklearn.manifold.isomap.Isomap", "sklearn.manifold.locally_linear.FLOAT_DTYPES",
- "sklearn.manifold.locally_linear.LocallyLinearEmbedding", "sklearn.manifold.locally_linear.barycenter_kneighbors_graph",
- "sklearn.manifold.locally_linear.barycenter_weights", "sklearn.manifold.locally_linear.locally_linear_embedding",
- "sklearn.manifold.locally_linear.null_space", "sklearn.manifold.locally_linear_embedding",
- "sklearn.manifold.mds.MDS", "sklearn.manifold.mds._smacof_single",
- "sklearn.manifold.mds.smacof", "sklearn.manifold.smacof",
- "sklearn.manifold.spectral_embedding", "sklearn.manifold.spectral_embedding_.SpectralEmbedding",
- "sklearn.manifold.spectral_embedding_._graph_connected_component", "sklearn.manifold.spectral_embedding_._graph_is_connected",
- "sklearn.manifold.spectral_embedding_._set_diag", "sklearn.manifold.spectral_embedding_.spectral_embedding",
- "sklearn.manifold.t_sne.MACHINE_EPSILON", "sklearn.manifold.t_sne.TSNE",
- "sklearn.manifold.t_sne._gradient_descent", "sklearn.manifold.t_sne._joint_probabilities",
- "sklearn.manifold.t_sne._joint_probabilities_nn", "sklearn.manifold.t_sne._kl_divergence",
- "sklearn.manifold.t_sne._kl_divergence_bh", "sklearn.manifold.t_sne.string_types",
- "sklearn.manifold.t_sne.trustworthiness", "sklearn.metrics.SCORERS",
- "sklearn.metrics.accuracy_score", "sklearn.metrics.adjusted_mutual_info_score",
- "sklearn.metrics.adjusted_rand_score", "sklearn.metrics.auc",
- "sklearn.metrics.average_precision_score", "sklearn.metrics.base._average_binary_score",
- "sklearn.metrics.brier_score_loss", "sklearn.metrics.calinski_harabaz_score",
- "sklearn.metrics.classification._check_binary_probabilistic_predictions", "sklearn.metrics.classification._check_targets",
- "sklearn.metrics.classification._prf_divide", "sklearn.metrics.classification._weighted_sum",
- "sklearn.metrics.classification.accuracy_score", "sklearn.metrics.classification.brier_score_loss",
- "sklearn.metrics.classification.classification_report", "sklearn.metrics.classification.cohen_kappa_score",
- "sklearn.metrics.classification.confusion_matrix", "sklearn.metrics.classification.f1_score",
- "sklearn.metrics.classification.fbeta_score", "sklearn.metrics.classification.hamming_loss",
- "sklearn.metrics.classification.hinge_loss", "sklearn.metrics.classification.jaccard_similarity_score",
- "sklearn.metrics.classification.log_loss", "sklearn.metrics.classification.matthews_corrcoef",
- "sklearn.metrics.classification.precision_recall_fscore_support", "sklearn.metrics.classification.precision_score",
- "sklearn.metrics.classification.recall_score", "sklearn.metrics.classification.zero_one_loss",
- "sklearn.metrics.classification_report", "sklearn.metrics.cluster.adjusted_mutual_info_score",
- "sklearn.metrics.cluster.adjusted_rand_score", "sklearn.metrics.cluster.bicluster._check_rows_and_columns",
- "sklearn.metrics.cluster.bicluster._jaccard", "sklearn.metrics.cluster.bicluster._pairwise_similarity",
- "sklearn.metrics.cluster.bicluster.consensus_score", "sklearn.metrics.cluster.calinski_harabaz_score",
- "sklearn.metrics.cluster.completeness_score", "sklearn.metrics.cluster.consensus_score",
- "sklearn.metrics.cluster.contingency_matrix", "sklearn.metrics.cluster.entropy",
- "sklearn.metrics.cluster.expected_mutual_info_fast.expected_mutual_information", "sklearn.metrics.cluster.expected_mutual_info_fast.gammaln",
- "sklearn.metrics.cluster.expected_mutual_information", "sklearn.metrics.cluster.fowlkes_mallows_score",
- "sklearn.metrics.cluster.homogeneity_completeness_v_measure", "sklearn.metrics.cluster.homogeneity_score",
- "sklearn.metrics.cluster.mutual_info_score", "sklearn.metrics.cluster.normalized_mutual_info_score",
- "sklearn.metrics.cluster.silhouette_samples", "sklearn.metrics.cluster.silhouette_score",
- "sklearn.metrics.cluster.supervised.adjusted_mutual_info_score", "sklearn.metrics.cluster.supervised.adjusted_rand_score",
- "sklearn.metrics.cluster.supervised.check_clusterings", "sklearn.metrics.cluster.supervised.comb2",
- "sklearn.metrics.cluster.supervised.completeness_score", "sklearn.metrics.cluster.supervised.contingency_matrix",
- "sklearn.metrics.cluster.supervised.entropy", "sklearn.metrics.cluster.supervised.fowlkes_mallows_score",
- "sklearn.metrics.cluster.supervised.homogeneity_completeness_v_measure", "sklearn.metrics.cluster.supervised.homogeneity_score",
- "sklearn.metrics.cluster.supervised.mutual_info_score", "sklearn.metrics.cluster.supervised.normalized_mutual_info_score",
- "sklearn.metrics.cluster.supervised.v_measure_score", "sklearn.metrics.cluster.unsupervised.calinski_harabaz_score",
- "sklearn.metrics.cluster.unsupervised.check_number_of_labels", "sklearn.metrics.cluster.unsupervised.silhouette_samples",
- "sklearn.metrics.cluster.unsupervised.silhouette_score", "sklearn.metrics.cluster.v_measure_score",
- "sklearn.metrics.cohen_kappa_score", "sklearn.metrics.completeness_score",
- "sklearn.metrics.confusion_matrix", "sklearn.metrics.consensus_score",
- "sklearn.metrics.coverage_error", "sklearn.metrics.euclidean_distances",
- "sklearn.metrics.explained_variance_score", "sklearn.metrics.f1_score",
- "sklearn.metrics.fbeta_score", "sklearn.metrics.fowlkes_mallows_score",
- "sklearn.metrics.get_scorer", "sklearn.metrics.hamming_loss",
- "sklearn.metrics.hinge_loss", "sklearn.metrics.homogeneity_completeness_v_measure",
- "sklearn.metrics.homogeneity_score", "sklearn.metrics.jaccard_similarity_score",
- "sklearn.metrics.label_ranking_average_precision_score", "sklearn.metrics.label_ranking_loss",
- "sklearn.metrics.log_loss", "sklearn.metrics.make_scorer",
- "sklearn.metrics.matthews_corrcoef", "sklearn.metrics.mean_absolute_error",
- "sklearn.metrics.mean_squared_error", "sklearn.metrics.mean_squared_log_error",
- "sklearn.metrics.median_absolute_error", "sklearn.metrics.mutual_info_score",
- "sklearn.metrics.normalized_mutual_info_score", "sklearn.metrics.pairwise.KERNEL_PARAMS",
- "sklearn.metrics.pairwise.PAIRED_DISTANCES", "sklearn.metrics.pairwise.PAIRWISE_BOOLEAN_FUNCTIONS",
- "sklearn.metrics.pairwise.PAIRWISE_DISTANCE_FUNCTIONS", "sklearn.metrics.pairwise.PAIRWISE_KERNEL_FUNCTIONS",
- "sklearn.metrics.pairwise._VALID_METRICS", "sklearn.metrics.pairwise._chi2_kernel_fast",
- "sklearn.metrics.pairwise._pairwise_callable", "sklearn.metrics.pairwise._parallel_pairwise",
- "sklearn.metrics.pairwise._return_float_dtype", "sklearn.metrics.pairwise._sparse_manhattan",
- "sklearn.metrics.pairwise.additive_chi2_kernel", "sklearn.metrics.pairwise.check_paired_arrays",
- "sklearn.metrics.pairwise.check_pairwise_arrays", "sklearn.metrics.pairwise.chi2_kernel",
- "sklearn.metrics.pairwise.cosine_distances", "sklearn.metrics.pairwise.cosine_similarity",
- "sklearn.metrics.pairwise.distance_metrics", "sklearn.metrics.pairwise.euclidean_distances",
- "sklearn.metrics.pairwise.kernel_metrics", "sklearn.metrics.pairwise.laplacian_kernel",
- "sklearn.metrics.pairwise.linear_kernel", "sklearn.metrics.pairwise.manhattan_distances",
- "sklearn.metrics.pairwise.paired_cosine_distances", "sklearn.metrics.pairwise.paired_distances",
- "sklearn.metrics.pairwise.paired_euclidean_distances", "sklearn.metrics.pairwise.paired_manhattan_distances",
- "sklearn.metrics.pairwise.pairwise_distances", "sklearn.metrics.pairwise.pairwise_distances_argmin",
- "sklearn.metrics.pairwise.pairwise_distances_argmin_min", "sklearn.metrics.pairwise.pairwise_kernels",
- "sklearn.metrics.pairwise.polynomial_kernel", "sklearn.metrics.pairwise.rbf_kernel",
- "sklearn.metrics.pairwise.sigmoid_kernel", "sklearn.metrics.pairwise_distances",
- "sklearn.metrics.pairwise_distances_argmin", "sklearn.metrics.pairwise_distances_argmin_min",
- "sklearn.metrics.pairwise_fast._chi2_kernel_fast", "sklearn.metrics.pairwise_fast._sparse_manhattan",
- "sklearn.metrics.pairwise_kernels", "sklearn.metrics.precision_recall_curve",
- "sklearn.metrics.precision_recall_fscore_support", "sklearn.metrics.precision_score",
- "sklearn.metrics.r2_score", "sklearn.metrics.ranking._binary_clf_curve",
- "sklearn.metrics.ranking.auc", "sklearn.metrics.ranking.average_precision_score",
- "sklearn.metrics.ranking.coverage_error", "sklearn.metrics.ranking.label_ranking_average_precision_score",
- "sklearn.metrics.ranking.label_ranking_loss", "sklearn.metrics.ranking.precision_recall_curve",
- "sklearn.metrics.ranking.roc_auc_score", "sklearn.metrics.ranking.roc_curve",
- "sklearn.metrics.recall_score", "sklearn.metrics.regression._check_reg_targets",
- "sklearn.metrics.regression.explained_variance_score", "sklearn.metrics.regression.mean_absolute_error",
- "sklearn.metrics.regression.mean_squared_error", "sklearn.metrics.regression.mean_squared_log_error",
- "sklearn.metrics.regression.median_absolute_error", "sklearn.metrics.regression.r2_score",
- "sklearn.metrics.regression.string_types", "sklearn.metrics.roc_auc_score",
- "sklearn.metrics.roc_curve", "sklearn.metrics.scorer.SCORERS",
- "sklearn.metrics.scorer._BaseScorer", "sklearn.metrics.scorer._PredictScorer",
- "sklearn.metrics.scorer._ProbaScorer", "sklearn.metrics.scorer._ThresholdScorer",
- "sklearn.metrics.scorer._check_multimetric_scoring", "sklearn.metrics.scorer._passthrough_scorer",
- "sklearn.metrics.scorer.accuracy_scorer", "sklearn.metrics.scorer.adjusted_mutual_info_scorer",
- "sklearn.metrics.scorer.adjusted_rand_scorer", "sklearn.metrics.scorer.average",
- "sklearn.metrics.scorer.average_precision_scorer", "sklearn.metrics.scorer.check_scoring",
- "sklearn.metrics.scorer.completeness_scorer", "sklearn.metrics.scorer.deprecation_msg",
- "sklearn.metrics.scorer.explained_variance_scorer", "sklearn.metrics.scorer.f1_scorer",
- "sklearn.metrics.scorer.fowlkes_mallows_scorer", "sklearn.metrics.scorer.get_scorer",
- "sklearn.metrics.scorer.homogeneity_scorer", "sklearn.metrics.scorer.log_loss_scorer",
- "sklearn.metrics.scorer.make_scorer", "sklearn.metrics.scorer.mean_absolute_error_scorer",
- "sklearn.metrics.scorer.mean_squared_error_scorer", "sklearn.metrics.scorer.median_absolute_error_scorer",
- "sklearn.metrics.scorer.mutual_info_scorer", "sklearn.metrics.scorer.name",
- "sklearn.metrics.scorer.neg_log_loss_scorer", "sklearn.metrics.scorer.neg_mean_absolute_error_scorer",
- "sklearn.metrics.scorer.neg_mean_squared_error_scorer", "sklearn.metrics.scorer.neg_mean_squared_log_error_scorer",
- "sklearn.metrics.scorer.neg_median_absolute_error_scorer", "sklearn.metrics.scorer.normalized_mutual_info_scorer",
- "sklearn.metrics.scorer.precision_scorer", "sklearn.metrics.scorer.qualified_name",
- "sklearn.metrics.scorer.r2_scorer", "sklearn.metrics.scorer.recall_scorer",
- "sklearn.metrics.scorer.roc_auc_scorer", "sklearn.metrics.scorer.v_measure_scorer",
- "sklearn.metrics.silhouette_samples", "sklearn.metrics.silhouette_score",
- "sklearn.metrics.v_measure_score", "sklearn.metrics.zero_one_loss",
- "sklearn.model_selection.BaseCrossValidator", "sklearn.model_selection.GridSearchCV",
- "sklearn.model_selection.GroupKFold", "sklearn.model_selection.GroupShuffleSplit",
- "sklearn.model_selection.KFold", "sklearn.model_selection.LeaveOneGroupOut",
- "sklearn.model_selection.LeaveOneOut", "sklearn.model_selection.LeavePGroupsOut",
- "sklearn.model_selection.LeavePOut", "sklearn.model_selection.ParameterGrid",
- "sklearn.model_selection.ParameterSampler", "sklearn.model_selection.PredefinedSplit",
- "sklearn.model_selection.RandomizedSearchCV", "sklearn.model_selection.RepeatedKFold",
- "sklearn.model_selection.RepeatedStratifiedKFold", "sklearn.model_selection.ShuffleSplit",
- "sklearn.model_selection.StratifiedKFold", "sklearn.model_selection.StratifiedShuffleSplit",
- "sklearn.model_selection.TimeSeriesSplit", "sklearn.model_selection._search.BaseSearchCV",
- "sklearn.model_selection._search.GridSearchCV", "sklearn.model_selection._search.ParameterGrid",
- "sklearn.model_selection._search.ParameterSampler", "sklearn.model_selection._search.RandomizedSearchCV",
- "sklearn.model_selection._search._CVScoreTuple", "sklearn.model_selection._search._check_param_grid",
- "sklearn.model_selection._search.fit_grid_point", "sklearn.model_selection._search.sp_version",
- "sklearn.model_selection._split.BaseCrossValidator", "sklearn.model_selection._split.BaseShuffleSplit",
- "sklearn.model_selection._split.GroupKFold", "sklearn.model_selection._split.GroupShuffleSplit",
- "sklearn.model_selection._split.KFold", "sklearn.model_selection._split.LeaveOneGroupOut",
- "sklearn.model_selection._split.LeaveOneOut", "sklearn.model_selection._split.LeavePGroupsOut",
- "sklearn.model_selection._split.LeavePOut", "sklearn.model_selection._split.PredefinedSplit",
- "sklearn.model_selection._split.RepeatedKFold", "sklearn.model_selection._split.RepeatedStratifiedKFold",
- "sklearn.model_selection._split.ShuffleSplit", "sklearn.model_selection._split.StratifiedKFold",
- "sklearn.model_selection._split.StratifiedShuffleSplit", "sklearn.model_selection._split.TimeSeriesSplit",
- "sklearn.model_selection._split._BaseKFold", "sklearn.model_selection._split._CVIterableWrapper",
- "sklearn.model_selection._split._RepeatedSplits", "sklearn.model_selection._split._approximate_mode",
- "sklearn.model_selection._split._build_repr", "sklearn.model_selection._split._validate_shuffle_split",
- "sklearn.model_selection._split._validate_shuffle_split_init", "sklearn.model_selection._split.check_cv",
- "sklearn.model_selection._split.train_test_split", "sklearn.model_selection._validation._aggregate_score_dicts",
- "sklearn.model_selection._validation._check_is_permutation", "sklearn.model_selection._validation._fit_and_predict",
- "sklearn.model_selection._validation._fit_and_score", "sklearn.model_selection._validation._incremental_fit_estimator",
- "sklearn.model_selection._validation._index_param_value", "sklearn.model_selection._validation._multimetric_score",
- "sklearn.model_selection._validation._permutation_test_score", "sklearn.model_selection._validation._score",
- "sklearn.model_selection._validation._shuffle", "sklearn.model_selection._validation._translate_train_sizes",
- "sklearn.model_selection._validation.cross_val_predict", "sklearn.model_selection._validation.cross_val_score",
- "sklearn.model_selection._validation.cross_validate", "sklearn.model_selection._validation.learning_curve",
- "sklearn.model_selection._validation.permutation_test_score", "sklearn.model_selection._validation.validation_curve",
- "sklearn.model_selection.check_cv", "sklearn.model_selection.cross_val_predict",
- "sklearn.model_selection.cross_val_score", "sklearn.model_selection.cross_validate",
- "sklearn.model_selection.fit_grid_point", "sklearn.model_selection.learning_curve",
- "sklearn.model_selection.permutation_test_score", "sklearn.model_selection.train_test_split",
- "sklearn.model_selection.validation_curve", "sklearn.multiclass.OneVsOneClassifier",
- "sklearn.multiclass.OneVsRestClassifier", "sklearn.multiclass.OutputCodeClassifier",
- "sklearn.multiclass._ConstantPredictor", "sklearn.multiclass._check_estimator",
- "sklearn.multiclass._fit_binary", "sklearn.multiclass._fit_ovo_binary",
- "sklearn.multiclass._partial_fit_binary", "sklearn.multiclass._partial_fit_ovo_binary",
- "sklearn.multiclass._predict_binary", "sklearn.naive_bayes.BaseDiscreteNB",
- "sklearn.naive_bayes.BaseNB", "sklearn.naive_bayes.BernoulliNB",
- "sklearn.naive_bayes.GaussianNB", "sklearn.naive_bayes.MultinomialNB",
- "sklearn.naive_bayes._ALPHA_MIN", "sklearn.neighbors.BallTree",
- "sklearn.neighbors.DistanceMetric", "sklearn.neighbors.KDTree",
- "sklearn.neighbors.KNeighborsClassifier", "sklearn.neighbors.KNeighborsRegressor",
- "sklearn.neighbors.KernelDensity", "sklearn.neighbors.LSHForest",
- "sklearn.neighbors.LocalOutlierFactor", "sklearn.neighbors.NearestCentroid",
- "sklearn.neighbors.NearestNeighbors", "sklearn.neighbors.RadiusNeighborsClassifier",
- "sklearn.neighbors.RadiusNeighborsRegressor", "sklearn.neighbors.approximate.GaussianRandomProjectionHash",
- "sklearn.neighbors.approximate.HASH_DTYPE", "sklearn.neighbors.approximate.LSHForest",
- "sklearn.neighbors.approximate.MAX_HASH_SIZE", "sklearn.neighbors.approximate.ProjectionToHashMixin",
- "sklearn.neighbors.approximate._array_of_arrays", "sklearn.neighbors.approximate._find_longest_prefix_match",
- "sklearn.neighbors.approximate._find_matching_indices", "sklearn.neighbors.ball_tree.BallTree",
- "sklearn.neighbors.ball_tree.BinaryTree", "sklearn.neighbors.ball_tree.CLASS_DOC",
- "sklearn.neighbors.ball_tree.DOC_DICT", "sklearn.neighbors.ball_tree.NeighborsHeap",
- "sklearn.neighbors.ball_tree.NodeData", "sklearn.neighbors.ball_tree.NodeHeap",
- "sklearn.neighbors.ball_tree.NodeHeapData", "sklearn.neighbors.ball_tree.VALID_METRICS",
- "sklearn.neighbors.ball_tree.VALID_METRIC_IDS", "sklearn.neighbors.ball_tree.kernel_norm",
- "sklearn.neighbors.ball_tree.load_heap", "sklearn.neighbors.ball_tree.newObj",
- "sklearn.neighbors.ball_tree.nodeheap_sort", "sklearn.neighbors.ball_tree.offsets",
- "sklearn.neighbors.ball_tree.simultaneous_sort", "sklearn.neighbors.base.KNeighborsMixin",
- "sklearn.neighbors.base.NeighborsBase", "sklearn.neighbors.base.PAIRWISE_DISTANCE_FUNCTIONS",
- "sklearn.neighbors.base.RadiusNeighborsMixin", "sklearn.neighbors.base.SupervisedFloatMixin",
- "sklearn.neighbors.base.SupervisedIntegerMixin", "sklearn.neighbors.base.UnsupervisedMixin",
- "sklearn.neighbors.base.VALID_METRICS", "sklearn.neighbors.base.VALID_METRICS_SPARSE",
- "sklearn.neighbors.base._check_weights", "sklearn.neighbors.base._get_weights",
- "sklearn.neighbors.classification.KNeighborsClassifier", "sklearn.neighbors.classification.RadiusNeighborsClassifier",
- "sklearn.neighbors.dist_metrics.BrayCurtisDistance", "sklearn.neighbors.dist_metrics.CanberraDistance",
- "sklearn.neighbors.dist_metrics.ChebyshevDistance", "sklearn.neighbors.dist_metrics.DiceDistance",
- "sklearn.neighbors.dist_metrics.DistanceMetric", "sklearn.neighbors.dist_metrics.EuclideanDistance",
- "sklearn.neighbors.dist_metrics.HammingDistance", "sklearn.neighbors.dist_metrics.HaversineDistance",
- "sklearn.neighbors.dist_metrics.JaccardDistance", "sklearn.neighbors.dist_metrics.KulsinskiDistance",
- "sklearn.neighbors.dist_metrics.METRIC_MAPPING", "sklearn.neighbors.dist_metrics.MahalanobisDistance",
- "sklearn.neighbors.dist_metrics.ManhattanDistance", "sklearn.neighbors.dist_metrics.MatchingDistance",
- "sklearn.neighbors.dist_metrics.MinkowskiDistance", "sklearn.neighbors.dist_metrics.PyFuncDistance",
- "sklearn.neighbors.dist_metrics.RogersTanimotoDistance", "sklearn.neighbors.dist_metrics.RussellRaoDistance",
- "sklearn.neighbors.dist_metrics.SEuclideanDistance", "sklearn.neighbors.dist_metrics.SokalMichenerDistance",
- "sklearn.neighbors.dist_metrics.SokalSneathDistance", "sklearn.neighbors.dist_metrics.WMinkowskiDistance",
- "sklearn.neighbors.dist_metrics.get_valid_metric_ids", "sklearn.neighbors.dist_metrics.newObj",
- "sklearn.neighbors.graph._check_params", "sklearn.neighbors.graph._query_include_self",
- "sklearn.neighbors.graph.kneighbors_graph", "sklearn.neighbors.graph.radius_neighbors_graph",
- "sklearn.neighbors.kd_tree.BinaryTree", "sklearn.neighbors.kd_tree.CLASS_DOC",
- "sklearn.neighbors.kd_tree.DOC_DICT", "sklearn.neighbors.kd_tree.KDTree",
- "sklearn.neighbors.kd_tree.NeighborsHeap", "sklearn.neighbors.kd_tree.NodeData",
- "sklearn.neighbors.kd_tree.NodeHeap", "sklearn.neighbors.kd_tree.NodeHeapData",
- "sklearn.neighbors.kd_tree.VALID_METRICS", "sklearn.neighbors.kd_tree.VALID_METRIC_IDS",
- "sklearn.neighbors.kd_tree.kernel_norm", "sklearn.neighbors.kd_tree.load_heap",
- "sklearn.neighbors.kd_tree.newObj", "sklearn.neighbors.kd_tree.nodeheap_sort",
- "sklearn.neighbors.kd_tree.offsets", "sklearn.neighbors.kd_tree.simultaneous_sort",
- "sklearn.neighbors.kde.KernelDensity", "sklearn.neighbors.kde.TREE_DICT",
- "sklearn.neighbors.kde.VALID_KERNELS", "sklearn.neighbors.kde.gammainc",
- "sklearn.neighbors.kneighbors_graph", "sklearn.neighbors.lof.LocalOutlierFactor",
- "sklearn.neighbors.nearest_centroid.NearestCentroid", "sklearn.neighbors.quad_tree.CELL_DTYPE",
- "sklearn.neighbors.quad_tree._QuadTree", "sklearn.neighbors.radius_neighbors_graph",
- "sklearn.neighbors.regression.KNeighborsRegressor", "sklearn.neighbors.regression.RadiusNeighborsRegressor",
- "sklearn.neighbors.unsupervised.NearestNeighbors", "sklearn.pipeline.FeatureUnion",
- "sklearn.pipeline.Pipeline", "sklearn.pipeline._fit_one_transformer",
- "sklearn.pipeline._fit_transform_one", "sklearn.pipeline._name_estimators",
- "sklearn.pipeline._transform_one", "sklearn.pipeline.make_pipeline",
- "sklearn.pipeline.make_union", "sklearn.preprocessing.Binarizer",
- "sklearn.preprocessing.FunctionTransformer", "sklearn.preprocessing.Imputer",
- "sklearn.preprocessing.KernelCenterer", "sklearn.preprocessing.LabelBinarizer",
- "sklearn.preprocessing.LabelEncoder", "sklearn.preprocessing.MaxAbsScaler",
- "sklearn.preprocessing.MinMaxScaler", "sklearn.preprocessing.MultiLabelBinarizer",
- "sklearn.preprocessing.Normalizer", "sklearn.preprocessing.OneHotEncoder",
- "sklearn.preprocessing.PolynomialFeatures", "sklearn.preprocessing.QuantileTransformer",
- "sklearn.preprocessing.RobustScaler", "sklearn.preprocessing.StandardScaler",
- "sklearn.preprocessing._function_transformer.FunctionTransformer", "sklearn.preprocessing._function_transformer._identity",
- "sklearn.preprocessing._function_transformer.string_types", "sklearn.preprocessing.add_dummy_feature",
- "sklearn.preprocessing.binarize", "sklearn.preprocessing.data.BOUNDS_THRESHOLD",
- "sklearn.preprocessing.data.Binarizer", "sklearn.preprocessing.data.FLOAT_DTYPES",
- "sklearn.preprocessing.data.KernelCenterer", "sklearn.preprocessing.data.MaxAbsScaler",
- "sklearn.preprocessing.data.MinMaxScaler", "sklearn.preprocessing.data.Normalizer",
- "sklearn.preprocessing.data.OneHotEncoder", "sklearn.preprocessing.data.PolynomialFeatures",
- "sklearn.preprocessing.data.QuantileTransformer", "sklearn.preprocessing.data.RobustScaler",
- "sklearn.preprocessing.data.StandardScaler", "sklearn.preprocessing.data._handle_zeros_in_scale",
- "sklearn.preprocessing.data._transform_selected", "sklearn.preprocessing.data.add_dummy_feature",
- "sklearn.preprocessing.data.binarize", "sklearn.preprocessing.data.maxabs_scale",
- "sklearn.preprocessing.data.minmax_scale", "sklearn.preprocessing.data.normalize",
- "sklearn.preprocessing.data.quantile_transform", "sklearn.preprocessing.data.robust_scale",
- "sklearn.preprocessing.data.scale", "sklearn.preprocessing.data.string_types",
- "sklearn.preprocessing.imputation.FLOAT_DTYPES", "sklearn.preprocessing.imputation.Imputer",
- "sklearn.preprocessing.imputation._get_mask", "sklearn.preprocessing.imputation._most_frequent",
- "sklearn.preprocessing.label.LabelBinarizer", "sklearn.preprocessing.label.LabelEncoder",
- "sklearn.preprocessing.label.MultiLabelBinarizer", "sklearn.preprocessing.label._inverse_binarize_multiclass",
- "sklearn.preprocessing.label._inverse_binarize_thresholding", "sklearn.preprocessing.label.label_binarize",
- "sklearn.preprocessing.label_binarize", "sklearn.preprocessing.maxabs_scale",
- "sklearn.preprocessing.minmax_scale", "sklearn.preprocessing.normalize",
- "sklearn.preprocessing.quantile_transform", "sklearn.preprocessing.robust_scale",
- "sklearn.preprocessing.scale", "sklearn.random_projection.BaseRandomProjection",
- "sklearn.random_projection.GaussianRandomProjection", "sklearn.random_projection.SparseRandomProjection",
- "sklearn.random_projection._check_density", "sklearn.random_projection._check_input_size",
- "sklearn.random_projection.gaussian_random_matrix", "sklearn.random_projection.johnson_lindenstrauss_min_dim",
- "sklearn.random_projection.sparse_random_matrix", "sklearn.set_config",
- "sklearn.setup_module", "sklearn.svm.LinearSVC",
- "sklearn.svm.LinearSVR", "sklearn.svm.NuSVC",
- "sklearn.svm.NuSVR", "sklearn.svm.OneClassSVM",
- "sklearn.svm.SVC", "sklearn.svm.SVR",
- "sklearn.svm.base.BaseLibSVM", "sklearn.svm.base.BaseSVC",
- "sklearn.svm.base.LIBSVM_IMPL", "sklearn.svm.base._fit_liblinear",
- "sklearn.svm.base._get_liblinear_solver_type", "sklearn.svm.base._one_vs_one_coef",
- "sklearn.svm.bounds.l1_min_c", "sklearn.svm.classes.LinearSVC",
- "sklearn.svm.classes.LinearSVR", "sklearn.svm.classes.NuSVC",
- "sklearn.svm.classes.NuSVR", "sklearn.svm.classes.OneClassSVM",
- "sklearn.svm.classes.SVC", "sklearn.svm.classes.SVR",
- "sklearn.svm.l1_min_c", "sklearn.svm.liblinear.set_verbosity_wrap",
- "sklearn.svm.liblinear.train_wrap", "sklearn.svm.libsvm.LIBSVM_KERNEL_TYPES",
- "sklearn.svm.libsvm.cross_validation", "sklearn.svm.libsvm.decision_function",
- "sklearn.svm.libsvm.fit", "sklearn.svm.libsvm.predict",
- "sklearn.svm.libsvm.predict_proba", "sklearn.svm.libsvm.set_verbosity_wrap",
- "sklearn.svm.libsvm_sparse.libsvm_sparse_decision_function", "sklearn.svm.libsvm_sparse.libsvm_sparse_predict",
- "sklearn.svm.libsvm_sparse.libsvm_sparse_predict_proba", "sklearn.svm.libsvm_sparse.libsvm_sparse_train",
- "sklearn.svm.libsvm_sparse.set_verbosity_wrap", "sklearn.tree.DecisionTreeClassifier",
- "sklearn.tree.DecisionTreeRegressor", "sklearn.tree.ExtraTreeClassifier",
- "sklearn.tree.ExtraTreeRegressor", "sklearn.tree._criterion.ClassificationCriterion",
- "sklearn.tree._criterion.Criterion", "sklearn.tree._criterion.Entropy",
- "sklearn.tree._criterion.FriedmanMSE", "sklearn.tree._criterion.Gini",
- "sklearn.tree._criterion.MAE", "sklearn.tree._criterion.MSE",
- "sklearn.tree._criterion.RegressionCriterion", "sklearn.tree._splitter.BaseDenseSplitter",
- "sklearn.tree._splitter.BaseSparseSplitter", "sklearn.tree._splitter.BestSparseSplitter",
- "sklearn.tree._splitter.BestSplitter", "sklearn.tree._splitter.RandomSparseSplitter",
- "sklearn.tree._splitter.RandomSplitter", "sklearn.tree._splitter.Splitter",
- "sklearn.tree._tree.BestFirstTreeBuilder", "sklearn.tree._tree.DepthFirstTreeBuilder",
- "sklearn.tree._tree.NODE_DTYPE", "sklearn.tree._tree.TREE_LEAF",
- "sklearn.tree._tree.TREE_UNDEFINED", "sklearn.tree._tree.Tree",
- "sklearn.tree._tree.TreeBuilder", "sklearn.tree._utils.PriorityHeap",
- "sklearn.tree._utils.Stack", "sklearn.tree._utils.WeightedMedianCalculator",
- "sklearn.tree._utils.WeightedPQueue", "sklearn.tree._utils._realloc_test",
- "sklearn.tree.export.SENTINEL", "sklearn.tree.export.Sentinel",
- "sklearn.tree.export._color_brew", "sklearn.tree.export.export_graphviz",
- "sklearn.tree.export_graphviz", "sklearn.tree.tree.BaseDecisionTree",
- "sklearn.tree.tree.CRITERIA_CLF", "sklearn.tree.tree.CRITERIA_REG",
- "sklearn.tree.tree.DENSE_SPLITTERS", "sklearn.tree.tree.DecisionTreeClassifier",
- "sklearn.tree.tree.DecisionTreeRegressor", "sklearn.tree.tree.ExtraTreeClassifier",
- "sklearn.tree.tree.ExtraTreeRegressor", "sklearn.tree.tree.SPARSE_SPLITTERS",
- "sklearn.utils.Bunch", "sklearn.utils._get_n_jobs",
- "sklearn.utils._logistic_sigmoid._log_logistic_sigmoid", "sklearn.utils._random._sample_without_replacement_check_input",
- "sklearn.utils._random._sample_without_replacement_with_pool", "sklearn.utils._random._sample_without_replacement_with_reservoir_sampling",
- "sklearn.utils._random._sample_without_replacement_with_tracking_selection", "sklearn.utils._random.sample_without_replacement",
- "sklearn.utils.arrayfuncs.cholesky_delete", "sklearn.utils.arrayfuncs.min_pos",
- "sklearn.utils.as_float_array", "sklearn.utils.assert_all_finite",
- "sklearn.utils.axis0_safe_slice", "sklearn.utils.check_X_y",
- "sklearn.utils.check_array", "sklearn.utils.check_consistent_length",
- "sklearn.utils.check_random_state", "sklearn.utils.check_symmetric",
- "sklearn.utils.class_weight.compute_class_weight", "sklearn.utils.class_weight.compute_sample_weight",
- "sklearn.utils.column_or_1d", "sklearn.utils.compute_class_weight",
- "sklearn.utils.compute_sample_weight", "sklearn.utils.deprecated",
- "sklearn.utils.deprecation.DeprecationDict", "sklearn.utils.deprecation._is_deprecated",
- "sklearn.utils.deprecation.deprecated", "sklearn.utils.extmath._deterministic_vector_sign_flip",
- "sklearn.utils.extmath._impose_f_order", "sklearn.utils.extmath._incremental_mean_and_var",
- "sklearn.utils.extmath.cartesian", "sklearn.utils.extmath.density",
- "sklearn.utils.extmath.fast_dot", "sklearn.utils.extmath.fast_logdet",
- "sklearn.utils.extmath.log_logistic", "sklearn.utils.extmath.logsumexp",
- "sklearn.utils.extmath.make_nonnegative", "sklearn.utils.extmath.norm",
- "sklearn.utils.extmath.np_version", "sklearn.utils.extmath.pinvh",
- "sklearn.utils.extmath.randomized_range_finder", "sklearn.utils.extmath.randomized_svd",
- "sklearn.utils.extmath.row_norms", "sklearn.utils.extmath.safe_min",
- "sklearn.utils.extmath.safe_sparse_dot", "sklearn.utils.extmath.softmax",
- "sklearn.utils.extmath.squared_norm", "sklearn.utils.extmath.stable_cumsum",
- "sklearn.utils.extmath.svd_flip", "sklearn.utils.extmath.weighted_mode",
- "sklearn.utils.fast_dict.IntFloatDict", "sklearn.utils.fast_dict.argmin",
- "sklearn.utils.fixes._parse_version", "sklearn.utils.fixes.divide",
- "sklearn.utils.fixes.euler_gamma", "sklearn.utils.fixes.makedirs",
- "sklearn.utils.fixes.np_version", "sklearn.utils.fixes.parallel_helper",
- "sklearn.utils.fixes.sp_version", "sklearn.utils.fixes.sparse_min_max",
- "sklearn.utils.gen_batches", "sklearn.utils.gen_even_slices",
- "sklearn.utils.graph.connected_components", "sklearn.utils.graph.graph_laplacian",
- "sklearn.utils.graph.graph_shortest_path", "sklearn.utils.graph.single_source_shortest_path_length",
- "sklearn.utils.graph_shortest_path.graph_shortest_path", "sklearn.utils.indexable",
- "sklearn.utils.indices_to_mask", "sklearn.utils.linear_assignment_._HungarianState",
- "sklearn.utils.linear_assignment_._hungarian", "sklearn.utils.linear_assignment_._step1",
- "sklearn.utils.linear_assignment_._step3", "sklearn.utils.linear_assignment_._step4",
- "sklearn.utils.linear_assignment_._step5", "sklearn.utils.linear_assignment_._step6",
- "sklearn.utils.linear_assignment_.linear_assignment", "sklearn.utils.metaestimators._BaseComposition",
- "sklearn.utils.metaestimators._IffHasAttrDescriptor", "sklearn.utils.metaestimators._safe_split",
- "sklearn.utils.metaestimators.if_delegate_has_method", "sklearn.utils.multiclass._FN_UNIQUE_LABELS",
- "sklearn.utils.multiclass._check_partial_fit_first_call", "sklearn.utils.multiclass._is_integral_float",
- "sklearn.utils.multiclass._ovr_decision_function", "sklearn.utils.multiclass._unique_indicator",
- "sklearn.utils.multiclass._unique_multiclass", "sklearn.utils.multiclass.check_classification_targets",
- "sklearn.utils.multiclass.class_distribution", "sklearn.utils.multiclass.is_multilabel",
- "sklearn.utils.multiclass.string_types", "sklearn.utils.multiclass.type_of_target",
- "sklearn.utils.multiclass.unique_labels", "sklearn.utils.murmurhash.murmurhash3_32",
- "sklearn.utils.murmurhash.murmurhash3_bytes_array_s32", "sklearn.utils.murmurhash.murmurhash3_bytes_array_u32",
- "sklearn.utils.murmurhash.murmurhash3_bytes_s32", "sklearn.utils.murmurhash.murmurhash3_bytes_u32",
- "sklearn.utils.murmurhash.murmurhash3_int_s32", "sklearn.utils.murmurhash.murmurhash3_int_u32",
- "sklearn.utils.murmurhash3_32", "sklearn.utils.optimize._LineSearchError",
- "sklearn.utils.optimize._cg", "sklearn.utils.optimize._line_search_wolfe12",
- "sklearn.utils.optimize.newton_cg", "sklearn.utils.random.choice",
- "sklearn.utils.random.random_choice_csc", "sklearn.utils.resample",
- "sklearn.utils.safe_indexing", "sklearn.utils.safe_mask",
- "sklearn.utils.safe_sqr", "sklearn.utils.seq_dataset.ArrayDataset",
- "sklearn.utils.seq_dataset.CSRDataset", "sklearn.utils.seq_dataset.SequentialDataset",
- "sklearn.utils.shuffle", "sklearn.utils.sparsefuncs._csc_mean_var_axis0",
- "sklearn.utils.sparsefuncs._csr_mean_var_axis0", "sklearn.utils.sparsefuncs._get_elem_at_rank",
- "sklearn.utils.sparsefuncs._get_median", "sklearn.utils.sparsefuncs._incr_mean_var_axis0",
- "sklearn.utils.sparsefuncs._raise_error_wrong_axis", "sklearn.utils.sparsefuncs._raise_typeerror",
- "sklearn.utils.sparsefuncs.count_nonzero", "sklearn.utils.sparsefuncs.csc_median_axis_0",
- "sklearn.utils.sparsefuncs.incr_mean_variance_axis", "sklearn.utils.sparsefuncs.inplace_column_scale",
- "sklearn.utils.sparsefuncs.inplace_csr_column_scale", "sklearn.utils.sparsefuncs.inplace_csr_row_scale",
- "sklearn.utils.sparsefuncs.inplace_row_scale", "sklearn.utils.sparsefuncs.inplace_swap_column",
- "sklearn.utils.sparsefuncs.inplace_swap_row", "sklearn.utils.sparsefuncs.inplace_swap_row_csc",
- "sklearn.utils.sparsefuncs.inplace_swap_row_csr", "sklearn.utils.sparsefuncs.mean_variance_axis",
- "sklearn.utils.sparsefuncs.min_max_axis", "sklearn.utils.sparsefuncs_fast._csc_mean_variance_axis0",
- "sklearn.utils.sparsefuncs_fast._csr_mean_variance_axis0", "sklearn.utils.sparsefuncs_fast._csr_row_norms",
- "sklearn.utils.sparsefuncs_fast._incr_mean_variance_axis0", "sklearn.utils.sparsefuncs_fast._inplace_csr_row_normalize_l1",
- "sklearn.utils.sparsefuncs_fast._inplace_csr_row_normalize_l2", "sklearn.utils.sparsefuncs_fast.assign_rows_csr",
- "sklearn.utils.sparsefuncs_fast.csc_mean_variance_axis0", "sklearn.utils.sparsefuncs_fast.csr_mean_variance_axis0",
- "sklearn.utils.sparsefuncs_fast.csr_row_norms", "sklearn.utils.sparsefuncs_fast.incr_mean_variance_axis0",
- "sklearn.utils.sparsefuncs_fast.inplace_csr_row_normalize_l1", "sklearn.utils.sparsefuncs_fast.inplace_csr_row_normalize_l2",
- "sklearn.utils.stats._weighted_percentile", "sklearn.utils.stats.rankdata",
- "sklearn.utils.tosequence", "sklearn.utils.validation.FLOAT_DTYPES",
- "sklearn.utils.validation._assert_all_finite", "sklearn.utils.validation._ensure_sparse_format",
- "sklearn.utils.validation._is_arraylike", "sklearn.utils.validation._num_samples",
- "sklearn.utils.validation._shape_repr", "sklearn.utils.validation.as_float_array",
- "sklearn.utils.validation.assert_all_finite", "sklearn.utils.validation.check_X_y",
- "sklearn.utils.validation.check_array", "sklearn.utils.validation.check_consistent_length",
- "sklearn.utils.validation.check_is_fitted", "sklearn.utils.validation.check_memory",
- "sklearn.utils.validation.check_non_negative", "sklearn.utils.validation.check_random_state",
- "sklearn.utils.validation.check_symmetric", "sklearn.utils.validation.column_or_1d",
- "sklearn.utils.validation.has_fit_parameter", "sklearn.utils.validation.indexable",
- "sklearn.utils.weight_vector.WeightVector"
-],
-
- "SKR_NAMES": [
- "skrebate.MultiSURF", "skrebate.MultiSURFstar",
- "skrebate.ReliefF", "skrebate.SURF",
- "skrebate.SURFstar", "skrebate.TuRF",
- "skrebate.multisurf.MultiSURF", "skrebate.multisurfstar.MultiSURFstar",
- "skrebate.relieff.ReliefF", "skrebate.scoring_utils.MultiSURF_compute_scores",
- "skrebate.scoring_utils.MultiSURFstar_compute_scores", "skrebate.scoring_utils.ReliefF_compute_scores",
- "skrebate.scoring_utils.SURF_compute_scores", "skrebate.scoring_utils.SURFstar_compute_scores",
- "skrebate.scoring_utils.compute_score", "skrebate.scoring_utils.get_row_missing",
- "skrebate.scoring_utils.ramp_function", "skrebate.surf.SURF",
- "skrebate.surfstar.SURFstar", "skrebate.turf.TuRF"
- ],
-
- "XGB_NAMES": [
- "xgboost.Booster", "xgboost.DMatrix",
- "xgboost.VERSION_FILE", "xgboost.XGBClassifier",
- "xgboost.XGBModel", "xgboost.XGBRegressor",
- "xgboost.callback._fmt_metric", "xgboost.callback._get_callback_context",
- "xgboost.callback.early_stop", "xgboost.callback.print_evaluation",
- "xgboost.callback.record_evaluation", "xgboost.callback.reset_learning_rate",
- "xgboost.compat.PANDAS_INSTALLED", "xgboost.compat.PY3",
- "xgboost.compat.SKLEARN_INSTALLED", "xgboost.compat.STRING_TYPES",
- "xgboost.compat.py_str", "xgboost.core.Booster",
- "xgboost.core.CallbackEnv", "xgboost.core.DMatrix",
- "xgboost.core.EarlyStopException", "xgboost.core.PANDAS_DTYPE_MAPPER",
- "xgboost.core.PANDAS_INSTALLED", "xgboost.core.PY3",
- "xgboost.core.STRING_TYPES", "xgboost.core.XGBoostError",
- "xgboost.core._check_call", "xgboost.core._load_lib",
- "xgboost.core._maybe_pandas_data", "xgboost.core._maybe_pandas_label",
- "xgboost.core.c_array", "xgboost.core.c_str",
- "xgboost.core.ctypes2buffer", "xgboost.core.ctypes2numpy",
- "xgboost.core.from_cstr_to_pystr", "xgboost.core.from_pystr_to_cstr",
- "xgboost.cv", "xgboost.f",
- "xgboost.libpath.XGBoostLibraryNotFound", "xgboost.libpath.find_lib_path",
- "xgboost.plot_importance", "xgboost.plot_tree",
- "xgboost.plotting._EDGEPAT", "xgboost.plotting._EDGEPAT2",
- "xgboost.plotting._LEAFPAT", "xgboost.plotting._NODEPAT",
- "xgboost.plotting._parse_edge", "xgboost.plotting._parse_node",
- "xgboost.plotting.plot_importance", "xgboost.plotting.plot_tree",
- "xgboost.plotting.to_graphviz", "xgboost.rabit.DTYPE_ENUM__",
- "xgboost.rabit.STRING_TYPES", "xgboost.rabit._init_rabit",
- "xgboost.rabit.allreduce", "xgboost.rabit.broadcast",
- "xgboost.rabit.finalize", "xgboost.rabit.get_processor_name",
- "xgboost.rabit.get_rank", "xgboost.rabit.get_world_size",
- "xgboost.rabit.init", "xgboost.rabit.tracker_print",
- "xgboost.rabit.version_number", "xgboost.sklearn.SKLEARN_INSTALLED",
- "xgboost.sklearn.XGBClassifier", "xgboost.sklearn.XGBModel",
- "xgboost.sklearn.XGBRegressor", "xgboost.sklearn._objective_decorator",
- "xgboost.to_graphviz", "xgboost.train",
- "xgboost.training.CVPack", "xgboost.training.SKLEARN_INSTALLED",
- "xgboost.training.STRING_TYPES", "xgboost.training._train_internal",
- "xgboost.training.aggcv", "xgboost.training.cv",
- "xgboost.training.mknfold", "xgboost.training.train"
- ],
-
-
- "NUMPY_NAMES": [
- "numpy.core.multiarray._reconstruct", "numpy.ndarray",
- "numpy.dtype", "numpy.core.multiarray.scalar",
- "numpy.random.__RandomState_ctor"
- ],
-
- "IMBLEARN_NAMES":[
- "imblearn.pipeline.Pipeline", "imblearn.over_sampling._random_over_sampler.RandomOverSampler",
- "imblearn.under_sampling._prototype_selection._edited_nearest_neighbours.EditedNearestNeighbours"
- ]
-}
\ No newline at end of file
diff -r 39ae276e75d9 -r e94395c672bd stacking_ensembles.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/stacking_ensembles.py Tue May 14 18:15:12 2019 -0400
@@ -0,0 +1,128 @@
+import argparse
+import json
+import pandas as pd
+import pickle
+import xgboost
+import warnings
+from sklearn import (cluster, compose, decomposition, ensemble,
+ feature_extraction, feature_selection,
+ gaussian_process, kernel_approximation, metrics,
+ model_selection, naive_bayes, neighbors,
+ pipeline, preprocessing, svm, linear_model,
+ tree, discriminant_analysis)
+from sklearn.model_selection._split import check_cv
+from feature_selectors import (DyRFE, DyRFECV,
+ MyPipeline, MyimbPipeline)
+from iraps_classifier import (IRAPSCore, IRAPSClassifier,
+ BinarizeTargetClassifier,
+ BinarizeTargetRegressor)
+from preprocessors import Z_RandomOverSampler
+from utils import load_model, get_cv, get_estimator, get_search_params
+
+from mlxtend.regressor import StackingCVRegressor, StackingRegressor
+from mlxtend.classifier import StackingCVClassifier, StackingClassifier
+
+
+warnings.filterwarnings('ignore')
+
+N_JOBS = int(__import__('os').environ.get('GALAXY_SLOTS', 1))
+
+
+def main(inputs_path, output_obj, base_paths=None, meta_path=None,
+ outfile_params=None):
+ """
+ Parameter
+ ---------
+ inputs_path : str
+ File path for Galaxy parameters
+
+ output_obj : str
+ File path for ensemble estimator ouput
+
+ base_paths : str
+ File path or paths concatenated by comma.
+
+ meta_path : str
+ File path
+
+ outfile_params : str
+ File path for params output
+ """
+ with open(inputs_path, 'r') as param_handler:
+ params = json.load(param_handler)
+
+ base_estimators = []
+ for idx, base_file in enumerate(base_paths.split(',')):
+ if base_file and base_file != 'None':
+ with open(base_file, 'rb') as handler:
+ model = load_model(handler)
+ else:
+ estimator_json = (params['base_est_builder'][idx]
+ ['estimator_selector'])
+ model = get_estimator(estimator_json)
+ base_estimators.append(model)
+
+ if meta_path:
+ with open(meta_path, 'rb') as f:
+ meta_estimator = load_model(f)
+ else:
+ estimator_json = params['meta_estimator']['estimator_selector']
+ meta_estimator = get_estimator(estimator_json)
+
+ options = params['algo_selection']['options']
+
+ cv_selector = options.pop('cv_selector', None)
+ if cv_selector:
+ splitter, groups = get_cv(cv_selector)
+ options['cv'] = splitter
+ # set n_jobs
+ options['n_jobs'] = N_JOBS
+
+ if params['algo_selection']['estimator_type'] == 'StackingCVClassifier':
+ ensemble_estimator = StackingCVClassifier(
+ classifiers=base_estimators,
+ meta_classifier=meta_estimator,
+ **options)
+
+ elif params['algo_selection']['estimator_type'] == 'StackingClassifier':
+ ensemble_estimator = StackingClassifier(
+ classifiers=base_estimators,
+ meta_classifier=meta_estimator,
+ **options)
+
+ elif params['algo_selection']['estimator_type'] == 'StackingCVRegressor':
+ ensemble_estimator = StackingCVRegressor(
+ regressors=base_estimators,
+ meta_regressor=meta_estimator,
+ **options)
+
+ else:
+ ensemble_estimator = StackingRegressor(
+ regressors=base_estimators,
+ meta_regressor=meta_estimator,
+ **options)
+
+ print(ensemble_estimator)
+ for base_est in base_estimators:
+ print(base_est)
+
+ with open(output_obj, 'wb') as out_handler:
+ pickle.dump(ensemble_estimator, out_handler, pickle.HIGHEST_PROTOCOL)
+
+ if params['get_params'] and outfile_params:
+ results = get_search_params(ensemble_estimator)
+ df = pd.DataFrame(results, columns=['', 'Parameter', 'Value'])
+ df.to_csv(outfile_params, sep='\t', index=False)
+
+
+if __name__ == '__main__':
+ aparser = argparse.ArgumentParser()
+ aparser.add_argument("-b", "--bases", dest="bases")
+ aparser.add_argument("-m", "--meta", dest="meta")
+ aparser.add_argument("-i", "--inputs", dest="inputs")
+ aparser.add_argument("-o", "--outfile", dest="outfile")
+ aparser.add_argument("-p", "--outfile_params", dest="outfile_params")
+ args = aparser.parse_args()
+
+ main(args.inputs, args.outfile, base_paths=args.bases,
+ meta_path=args.meta, outfile_params=args.outfile_params)
diff -r 39ae276e75d9 -r e94395c672bd test-data/GridSearchCV.zip
Binary file test-data/GridSearchCV.zip has changed
diff -r 39ae276e75d9 -r e94395c672bd test-data/LinearRegression01.zip
Binary file test-data/LinearRegression01.zip has changed
diff -r 39ae276e75d9 -r e94395c672bd test-data/LinearRegression02.zip
Binary file test-data/LinearRegression02.zip has changed
diff -r 39ae276e75d9 -r e94395c672bd test-data/RFE.zip
Binary file test-data/RFE.zip has changed
diff -r 39ae276e75d9 -r e94395c672bd test-data/RandomForestClassifier.zip
Binary file test-data/RandomForestClassifier.zip has changed
diff -r 39ae276e75d9 -r e94395c672bd test-data/RandomForestRegressor01.zip
Binary file test-data/RandomForestRegressor01.zip has changed
diff -r 39ae276e75d9 -r e94395c672bd test-data/StackingCVRegressor01.zip
Binary file test-data/StackingCVRegressor01.zip has changed
diff -r 39ae276e75d9 -r e94395c672bd test-data/StackingCVRegressor02.zip
Binary file test-data/StackingCVRegressor02.zip has changed
diff -r 39ae276e75d9 -r e94395c672bd test-data/XGBRegressor01.zip
Binary file test-data/XGBRegressor01.zip has changed
diff -r 39ae276e75d9 -r e94395c672bd test-data/best_estimator_.zip
Binary file test-data/best_estimator_.zip has changed
diff -r 39ae276e75d9 -r e94395c672bd test-data/best_params_.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/best_params_.txt Tue May 14 18:15:12 2019 -0400
@@ -0,0 +1,1 @@
+{'estimator__n_estimators': 100}
\ No newline at end of file
diff -r 39ae276e75d9 -r e94395c672bd test-data/best_score_.tabular
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/best_score_.tabular Tue May 14 18:15:12 2019 -0400
@@ -0,0 +1,2 @@
+best_score_
+0.7976348550293088
diff -r 39ae276e75d9 -r e94395c672bd test-data/feature_importances_.tabular
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/feature_importances_.tabular Tue May 14 18:15:12 2019 -0400
@@ -0,0 +1,11 @@
+feature_importances_
+0.15959252
+0.20373514
+0.22071308
+0.06281833
+0.098471984
+0.06960951
+0.13073005
+0.027164686
+0.022071308
+0.0050933785
diff -r 39ae276e75d9 -r e94395c672bd test-data/feature_selection_result13
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/feature_selection_result13 Tue May 14 18:15:12 2019 -0400
@@ -0,0 +1,262 @@
+temp_1 average forecast_noaa friend
+69.0 69.7 65.0 88.0
+59.0 58.1 57.0 66.0
+88.0 77.3 75.0 70.0
+65.0 64.7 63.0 58.0
+50.0 47.5 44.0 58.0
+51.0 48.2 45.0 63.0
+52.0 48.6 45.0 41.0
+78.0 76.7 75.0 66.0
+35.0 45.2 43.0 38.0
+40.0 46.1 45.0 36.0
+47.0 45.3 41.0 58.0
+72.0 76.3 76.0 88.0
+76.0 74.4 73.0 72.0
+39.0 45.3 45.0 46.0
+78.0 72.2 70.0 84.0
+71.0 67.3 63.0 85.0
+48.0 47.7 44.0 61.0
+72.0 77.0 77.0 68.0
+57.0 54.7 50.0 70.0
+40.0 45.1 44.0 39.0
+54.0 47.6 47.0 53.0
+58.0 53.2 52.0 71.0
+68.0 58.6 58.0 54.0
+65.0 55.3 55.0 65.0
+47.0 48.8 46.0 51.0
+44.0 45.6 43.0 42.0
+64.0 67.1 64.0 69.0
+62.0 57.1 57.0 67.0
+66.0 65.7 64.0 74.0
+70.0 71.8 67.0 90.0
+57.0 54.2 54.0 70.0
+50.0 50.5 46.0 57.0
+55.0 51.8 49.0 71.0
+55.0 49.5 46.0 67.0
+42.0 45.2 41.0 47.0
+65.0 60.1 57.0 41.0
+63.0 65.6 63.0 73.0
+48.0 47.3 45.0 28.0
+42.0 46.3 44.0 62.0
+51.0 46.2 45.0 38.0
+64.0 68.0 65.0 64.0
+75.0 74.6 74.0 63.0
+52.0 46.7 42.0 39.0
+67.0 68.6 66.0 80.0
+68.0 68.7 65.0 56.0
+54.0 55.0 53.0 42.0
+62.0 56.8 52.0 70.0
+76.0 76.1 76.0 61.0
+73.0 73.1 71.0 93.0
+52.0 50.3 50.0 35.0
+70.0 73.9 71.0 68.0
+77.0 77.4 75.0 62.0
+60.0 56.6 52.0 72.0
+52.0 53.3 50.0 54.0
+79.0 75.0 71.0 85.0
+76.0 57.2 53.0 74.0
+66.0 66.5 64.0 85.0
+57.0 61.8 58.0 62.0
+66.0 57.4 57.0 60.0
+61.0 58.4 58.0 41.0
+55.0 53.1 52.0 65.0
+48.0 48.1 46.0 54.0
+49.0 49.2 46.0 63.0
+65.0 66.7 64.0 73.0
+60.0 62.5 58.0 56.0
+56.0 53.0 53.0 36.0
+59.0 57.4 56.0 44.0
+44.0 45.7 41.0 35.0
+82.0 63.2 62.0 83.0
+64.0 67.0 65.0 76.0
+43.0 45.5 41.0 46.0
+64.0 55.7 51.0 57.0
+63.0 52.7 49.0 49.0
+70.0 70.6 67.0 79.0
+71.0 52.4 48.0 42.0
+76.0 73.5 69.0 85.0
+68.0 62.1 58.0 55.0
+39.0 45.3 44.0 39.0
+71.0 70.7 70.0 52.0
+69.0 71.7 68.0 89.0
+74.0 71.5 71.0 82.0
+81.0 64.1 62.0 81.0
+51.0 49.3 49.0 34.0
+45.0 46.8 44.0 61.0
+87.0 76.8 73.0 73.0
+71.0 73.8 71.0 86.0
+55.0 60.3 56.0 77.0
+80.0 76.9 72.0 81.0
+67.0 69.0 65.0 76.0
+61.0 61.4 60.0 78.0
+46.0 46.6 43.0 65.0
+39.0 45.1 42.0 51.0
+67.0 68.3 67.0 61.0
+52.0 47.8 43.0 50.0
+67.0 69.8 68.0 87.0
+75.0 71.2 67.0 77.0
+68.0 73.3 73.0 79.0
+92.0 68.2 65.0 71.0
+67.0 72.8 69.0 56.0
+44.0 45.8 43.0 56.0
+61.0 61.0 56.0 73.0
+65.0 53.4 49.0 41.0
+68.0 73.0 72.0 70.0
+87.0 62.1 62.0 69.0
+117.0 54.8 51.0 62.0
+80.0 76.4 75.0 66.0
+57.0 51.0 47.0 46.0
+67.0 63.6 61.0 68.0
+58.0 54.0 51.0 56.0
+65.0 56.2 53.0 41.0
+52.0 48.6 45.0 47.0
+59.0 55.3 52.0 39.0
+57.0 53.9 53.0 35.0
+81.0 59.2 56.0 66.0
+75.0 77.1 76.0 75.0
+76.0 77.4 76.0 95.0
+57.0 64.8 61.0 53.0
+69.0 74.2 72.0 86.0
+77.0 66.8 66.0 64.0
+55.0 49.9 47.0 55.0
+49.0 46.8 45.0 53.0
+54.0 52.7 48.0 57.0
+55.0 51.2 49.0 42.0
+56.0 55.6 53.0 45.0
+68.0 74.6 72.0 77.0
+54.0 53.4 49.0 44.0
+67.0 69.0 69.0 87.0
+49.0 46.9 45.0 33.0
+49.0 49.1 47.0 45.0
+56.0 48.5 48.0 49.0
+73.0 71.0 66.0 78.0
+66.0 66.4 65.0 60.0
+69.0 66.5 66.0 62.0
+82.0 64.5 64.0 65.0
+90.0 76.7 75.0 65.0
+51.0 50.7 49.0 43.0
+77.0 57.1 57.0 41.0
+60.0 61.4 58.0 58.0
+74.0 72.8 71.0 87.0
+85.0 77.2 73.0 74.0
+68.0 62.8 61.0 64.0
+56.0 49.5 46.0 37.0
+71.0 56.2 55.0 45.0
+62.0 59.5 57.0 40.0
+83.0 77.3 76.0 76.0
+64.0 65.4 62.0 56.0
+56.0 48.4 45.0 54.0
+41.0 45.1 42.0 31.0
+65.0 66.2 66.0 67.0
+65.0 53.7 49.0 38.0
+40.0 46.0 46.0 41.0
+45.0 45.6 43.0 29.0
+52.0 48.4 48.0 58.0
+63.0 51.7 50.0 63.0
+52.0 47.6 47.0 44.0
+60.0 57.9 55.0 77.0
+81.0 75.7 73.0 89.0
+75.0 75.8 74.0 77.0
+59.0 51.4 48.0 64.0
+73.0 77.1 77.0 94.0
+75.0 77.3 73.0 66.0
+60.0 58.5 56.0 59.0
+75.0 71.3 68.0 56.0
+59.0 57.6 56.0 40.0
+53.0 49.1 47.0 56.0
+79.0 77.2 76.0 60.0
+57.0 52.1 49.0 46.0
+75.0 67.6 64.0 77.0
+71.0 69.4 67.0 81.0
+53.0 50.2 50.0 42.0
+46.0 48.8 48.0 56.0
+81.0 76.9 72.0 70.0
+49.0 48.9 47.0 29.0
+57.0 48.4 44.0 34.0
+60.0 58.8 54.0 53.0
+67.0 73.7 72.0 64.0
+61.0 64.1 62.0 60.0
+66.0 69.5 66.0 85.0
+64.0 51.9 50.0 55.0
+66.0 65.7 62.0 49.0
+64.0 52.2 52.0 49.0
+71.0 65.2 61.0 56.0
+75.0 63.8 62.0 60.0
+48.0 46.4 46.0 47.0
+53.0 52.5 48.0 70.0
+49.0 47.1 46.0 65.0
+85.0 68.5 67.0 81.0
+62.0 49.4 48.0 30.0
+50.0 47.0 42.0 58.0
+58.0 55.9 51.0 39.0
+72.0 77.2 74.0 95.0
+55.0 50.7 50.0 34.0
+74.0 72.3 70.0 91.0
+85.0 77.3 77.0 77.0
+73.0 77.3 77.0 93.0
+52.0 47.4 44.0 39.0
+67.0 67.6 64.0 62.0
+45.0 45.1 45.0 35.0
+46.0 47.2 46.0 41.0
+66.0 60.6 60.0 57.0
+71.0 77.0 75.0 86.0
+70.0 69.3 66.0 79.0
+58.0 49.9 46.0 53.0
+72.0 77.1 76.0 65.0
+74.0 75.4 74.0 71.0
+65.0 64.5 63.0 49.0
+77.0 58.8 55.0 39.0
+59.0 50.9 49.0 35.0
+45.0 45.7 41.0 61.0
+53.0 50.5 49.0 46.0
+53.0 54.9 54.0 72.0
+79.0 77.3 73.0 79.0
+49.0 49.0 44.0 44.0
+63.0 62.9 62.0 78.0
+69.0 56.5 54.0 45.0
+60.0 50.8 47.0 46.0
+64.0 62.5 60.0 73.0
+79.0 71.0 66.0 64.0
+55.0 47.0 43.0 58.0
+73.0 56.0 54.0 41.0
+60.0 59.1 57.0 62.0
+67.0 70.2 67.0 77.0
+42.0 45.2 45.0 58.0
+60.0 65.0 62.0 55.0
+57.0 49.8 47.0 30.0
+35.0 45.2 44.0 36.0
+75.0 70.3 66.0 84.0
+61.0 51.1 48.0 65.0
+51.0 50.6 46.0 59.0
+71.0 71.9 67.0 70.0
+74.0 75.3 74.0 71.0
+48.0 45.4 44.0 42.0
+74.0 74.9 70.0 60.0
+76.0 70.8 68.0 57.0
+58.0 51.6 47.0 37.0
+51.0 50.4 48.0 43.0
+72.0 72.6 68.0 78.0
+76.0 67.2 64.0 74.0
+52.0 47.9 47.0 60.0
+53.0 48.2 48.0 53.0
+65.0 69.1 65.0 83.0
+58.0 58.1 58.0 43.0
+77.0 75.6 74.0 56.0
+61.0 52.9 51.0 35.0
+67.0 65.3 64.0 54.0
+54.0 49.3 46.0 58.0
+79.0 67.4 65.0 58.0
+77.0 64.3 63.0 67.0
+71.0 67.7 64.0 55.0
+58.0 57.7 54.0 61.0
+68.0 55.9 55.0 56.0
+40.0 45.4 45.0 49.0
+80.0 77.3 75.0 71.0
+74.0 62.3 59.0 61.0
+57.0 45.5 42.0 57.0
+52.0 47.8 43.0 57.0
+71.0 75.1 71.0 95.0
+49.0 53.6 49.0 70.0
+89.0 59.0 59.0 61.0
+60.0 60.2 56.0 78.0
+59.0 58.3 58.0 40.0
diff -r 39ae276e75d9 -r e94395c672bd test-data/final_estimator.zip
Binary file test-data/final_estimator.zip has changed
diff -r 39ae276e75d9 -r e94395c672bd test-data/get_params.tabular
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/get_params.tabular Tue May 14 18:15:12 2019 -0400
@@ -0,0 +1,6 @@
+ Parameter Value
+@ copy_X copy_X: True
+@ fit_intercept fit_intercept: True
+* n_jobs n_jobs: 1
+@ normalize normalize: False
+ Note: @, params eligible for search in searchcv tool.
diff -r 39ae276e75d9 -r e94395c672bd test-data/get_params01.tabular
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/get_params01.tabular Tue May 14 18:15:12 2019 -0400
@@ -0,0 +1,30 @@
+ Parameter Value
+* memory memory: None
+* steps "steps: [('robustscaler', RobustScaler(copy=True, quantile_range=(25.0, 75.0), with_centering=True,
+ with_scaling=True)), ('selectkbest', SelectKBest(k=10, score_func=)), ('svr', SVR(C=1.0, cache_size=200, coef0=0.0, degree=3, epsilon=0.1,
+ gamma='auto_deprecated', kernel='linear', max_iter=-1, shrinking=True,
+ tol=0.001, verbose=False))]"
+@ robustscaler "robustscaler: RobustScaler(copy=True, quantile_range=(25.0, 75.0), with_centering=True,
+ with_scaling=True)"
+@ selectkbest selectkbest: SelectKBest(k=10, score_func=)
+@ svr "svr: SVR(C=1.0, cache_size=200, coef0=0.0, degree=3, epsilon=0.1,
+ gamma='auto_deprecated', kernel='linear', max_iter=-1, shrinking=True,
+ tol=0.001, verbose=False)"
+@ robustscaler__copy robustscaler__copy: True
+@ robustscaler__quantile_range robustscaler__quantile_range: (25.0, 75.0)
+@ robustscaler__with_centering robustscaler__with_centering: True
+@ robustscaler__with_scaling robustscaler__with_scaling: True
+@ selectkbest__k selectkbest__k: 10
+@ selectkbest__score_func selectkbest__score_func:
+@ svr__C svr__C: 1.0
+@ svr__cache_size svr__cache_size: 200
+@ svr__coef0 svr__coef0: 0.0
+@ svr__degree svr__degree: 3
+@ svr__epsilon svr__epsilon: 0.1
+@ svr__gamma svr__gamma: 'auto_deprecated'
+@ svr__kernel svr__kernel: 'linear'
+@ svr__max_iter svr__max_iter: -1
+@ svr__shrinking svr__shrinking: True
+@ svr__tol svr__tol: 0.001
+* svr__verbose svr__verbose: False
+ Note: @, searchable params in searchcv too.
diff -r 39ae276e75d9 -r e94395c672bd test-data/get_params02.tabular
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/get_params02.tabular Tue May 14 18:15:12 2019 -0400
@@ -0,0 +1,33 @@
+ Parameter Value
+* memory memory: None
+* steps "steps: [('robustscaler', RobustScaler(copy=True, quantile_range=(25.0, 75.0), with_centering=True,
+ with_scaling=True)), ('lassocv', LassoCV(alphas=None, copy_X=True, cv='warn', eps=0.001, fit_intercept=True,
+ max_iter=1000, n_alphas=100, n_jobs=1, normalize=False, positive=False,
+ precompute='auto', random_state=None, selection='cyclic', tol=0.0001,
+ verbose=False))]"
+@ robustscaler "robustscaler: RobustScaler(copy=True, quantile_range=(25.0, 75.0), with_centering=True,
+ with_scaling=True)"
+@ lassocv "lassocv: LassoCV(alphas=None, copy_X=True, cv='warn', eps=0.001, fit_intercept=True,
+ max_iter=1000, n_alphas=100, n_jobs=1, normalize=False, positive=False,
+ precompute='auto', random_state=None, selection='cyclic', tol=0.0001,
+ verbose=False)"
+@ robustscaler__copy robustscaler__copy: True
+@ robustscaler__quantile_range robustscaler__quantile_range: (25.0, 75.0)
+@ robustscaler__with_centering robustscaler__with_centering: True
+@ robustscaler__with_scaling robustscaler__with_scaling: True
+@ lassocv__alphas lassocv__alphas: None
+@ lassocv__copy_X lassocv__copy_X: True
+@ lassocv__cv lassocv__cv: 'warn'
+@ lassocv__eps lassocv__eps: 0.001
+@ lassocv__fit_intercept lassocv__fit_intercept: True
+@ lassocv__max_iter lassocv__max_iter: 1000
+@ lassocv__n_alphas lassocv__n_alphas: 100
+* lassocv__n_jobs lassocv__n_jobs: 1
+@ lassocv__normalize lassocv__normalize: False
+@ lassocv__positive lassocv__positive: False
+@ lassocv__precompute lassocv__precompute: 'auto'
+@ lassocv__random_state lassocv__random_state: None
+@ lassocv__selection lassocv__selection: 'cyclic'
+@ lassocv__tol lassocv__tol: 0.0001
+* lassocv__verbose lassocv__verbose: False
+ Note: @, searchable params in searchcv too.
diff -r 39ae276e75d9 -r e94395c672bd test-data/get_params03.tabular
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/get_params03.tabular Tue May 14 18:15:12 2019 -0400
@@ -0,0 +1,43 @@
+ Parameter Value
+* memory memory: None
+* steps "steps: [('robustscaler', RobustScaler(copy=True, quantile_range=(25.0, 75.0), with_centering=True,
+ with_scaling=True)), ('xgbclassifier', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
+ colsample_bytree=1, gamma=0, learning_rate=0.1, max_delta_step=0,
+ max_depth=3, min_child_weight=1, missing=nan, n_estimators=100,
+ n_jobs=1, nthread=None, objective='binary:logistic', random_state=0,
+ reg_alpha=0, reg_lambda=1, scale_pos_weight=1, seed=None,
+ silent=True, subsample=1))]"
+@ robustscaler "robustscaler: RobustScaler(copy=True, quantile_range=(25.0, 75.0), with_centering=True,
+ with_scaling=True)"
+@ xgbclassifier "xgbclassifier: XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
+ colsample_bytree=1, gamma=0, learning_rate=0.1, max_delta_step=0,
+ max_depth=3, min_child_weight=1, missing=nan, n_estimators=100,
+ n_jobs=1, nthread=None, objective='binary:logistic', random_state=0,
+ reg_alpha=0, reg_lambda=1, scale_pos_weight=1, seed=None,
+ silent=True, subsample=1)"
+@ robustscaler__copy robustscaler__copy: True
+@ robustscaler__quantile_range robustscaler__quantile_range: (25.0, 75.0)
+@ robustscaler__with_centering robustscaler__with_centering: True
+@ robustscaler__with_scaling robustscaler__with_scaling: True
+@ xgbclassifier__base_score xgbclassifier__base_score: 0.5
+@ xgbclassifier__booster xgbclassifier__booster: 'gbtree'
+@ xgbclassifier__colsample_bylevel xgbclassifier__colsample_bylevel: 1
+@ xgbclassifier__colsample_bytree xgbclassifier__colsample_bytree: 1
+@ xgbclassifier__gamma xgbclassifier__gamma: 0
+@ xgbclassifier__learning_rate xgbclassifier__learning_rate: 0.1
+@ xgbclassifier__max_delta_step xgbclassifier__max_delta_step: 0
+@ xgbclassifier__max_depth xgbclassifier__max_depth: 3
+@ xgbclassifier__min_child_weight xgbclassifier__min_child_weight: 1
+@ xgbclassifier__missing xgbclassifier__missing: nan
+@ xgbclassifier__n_estimators xgbclassifier__n_estimators: 100
+* xgbclassifier__n_jobs xgbclassifier__n_jobs: 1
+* xgbclassifier__nthread xgbclassifier__nthread: None
+@ xgbclassifier__objective xgbclassifier__objective: 'binary:logistic'
+@ xgbclassifier__random_state xgbclassifier__random_state: 0
+@ xgbclassifier__reg_alpha xgbclassifier__reg_alpha: 0
+@ xgbclassifier__reg_lambda xgbclassifier__reg_lambda: 1
+@ xgbclassifier__scale_pos_weight xgbclassifier__scale_pos_weight: 1
+@ xgbclassifier__seed xgbclassifier__seed: None
+@ xgbclassifier__silent xgbclassifier__silent: True
+@ xgbclassifier__subsample xgbclassifier__subsample: 1
+ Note: @, searchable params in searchcv too.
diff -r 39ae276e75d9 -r e94395c672bd test-data/get_params04.tabular
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/get_params04.tabular Tue May 14 18:15:12 2019 -0400
@@ -0,0 +1,39 @@
+ Parameter Value
+* memory memory: None
+* steps "steps: [('selectfrommodel', SelectFromModel(estimator=AdaBoostClassifier(algorithm='SAMME.R', base_estimator=None,
+ learning_rate=1.0, n_estimators=50, random_state=None),
+ max_features=None, norm_order=1, prefit=False, threshold=None)), ('linearsvc', LinearSVC(C=1.0, class_weight=None, dual=True, fit_intercept=True,
+ intercept_scaling=1, loss='squared_hinge', max_iter=1000,
+ multi_class='ovr', penalty='l2', random_state=None, tol=0.0001,
+ verbose=0))]"
+@ selectfrommodel "selectfrommodel: SelectFromModel(estimator=AdaBoostClassifier(algorithm='SAMME.R', base_estimator=None,
+ learning_rate=1.0, n_estimators=50, random_state=None),
+ max_features=None, norm_order=1, prefit=False, threshold=None)"
+@ linearsvc "linearsvc: LinearSVC(C=1.0, class_weight=None, dual=True, fit_intercept=True,
+ intercept_scaling=1, loss='squared_hinge', max_iter=1000,
+ multi_class='ovr', penalty='l2', random_state=None, tol=0.0001,
+ verbose=0)"
+@ selectfrommodel__estimator__algorithm selectfrommodel__estimator__algorithm: 'SAMME.R'
+@ selectfrommodel__estimator__base_estimator selectfrommodel__estimator__base_estimator: None
+@ selectfrommodel__estimator__learning_rate selectfrommodel__estimator__learning_rate: 1.0
+@ selectfrommodel__estimator__n_estimators selectfrommodel__estimator__n_estimators: 50
+@ selectfrommodel__estimator__random_state selectfrommodel__estimator__random_state: None
+@ selectfrommodel__estimator "selectfrommodel__estimator: AdaBoostClassifier(algorithm='SAMME.R', base_estimator=None,
+ learning_rate=1.0, n_estimators=50, random_state=None)"
+@ selectfrommodel__max_features selectfrommodel__max_features: None
+@ selectfrommodel__norm_order selectfrommodel__norm_order: 1
+@ selectfrommodel__prefit selectfrommodel__prefit: False
+@ selectfrommodel__threshold selectfrommodel__threshold: None
+@ linearsvc__C linearsvc__C: 1.0
+@ linearsvc__class_weight linearsvc__class_weight: None
+@ linearsvc__dual linearsvc__dual: True
+@ linearsvc__fit_intercept linearsvc__fit_intercept: True
+@ linearsvc__intercept_scaling linearsvc__intercept_scaling: 1
+@ linearsvc__loss linearsvc__loss: 'squared_hinge'
+@ linearsvc__max_iter linearsvc__max_iter: 1000
+@ linearsvc__multi_class linearsvc__multi_class: 'ovr'
+@ linearsvc__penalty linearsvc__penalty: 'l2'
+@ linearsvc__random_state linearsvc__random_state: None
+@ linearsvc__tol linearsvc__tol: 0.0001
+* linearsvc__verbose linearsvc__verbose: 0
+ Note: @, searchable params in searchcv too.
diff -r 39ae276e75d9 -r e94395c672bd test-data/get_params05.tabular
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/get_params05.tabular Tue May 14 18:15:12 2019 -0400
@@ -0,0 +1,31 @@
+ Parameter Value
+* memory memory: None
+* steps "steps: [('randomforestregressor', RandomForestRegressor(bootstrap=True, criterion='mse', max_depth=None,
+ max_features='auto', max_leaf_nodes=None,
+ min_impurity_decrease=0.0, min_impurity_split=None,
+ min_samples_leaf=1, min_samples_split=2,
+ min_weight_fraction_leaf=0.0, n_estimators=100, n_jobs=1,
+ oob_score=False, random_state=42, verbose=0, warm_start=False))]"
+@ randomforestregressor "randomforestregressor: RandomForestRegressor(bootstrap=True, criterion='mse', max_depth=None,
+ max_features='auto', max_leaf_nodes=None,
+ min_impurity_decrease=0.0, min_impurity_split=None,
+ min_samples_leaf=1, min_samples_split=2,
+ min_weight_fraction_leaf=0.0, n_estimators=100, n_jobs=1,
+ oob_score=False, random_state=42, verbose=0, warm_start=False)"
+@ randomforestregressor__bootstrap randomforestregressor__bootstrap: True
+@ randomforestregressor__criterion randomforestregressor__criterion: 'mse'
+@ randomforestregressor__max_depth randomforestregressor__max_depth: None
+@ randomforestregressor__max_features randomforestregressor__max_features: 'auto'
+@ randomforestregressor__max_leaf_nodes randomforestregressor__max_leaf_nodes: None
+@ randomforestregressor__min_impurity_decrease randomforestregressor__min_impurity_decrease: 0.0
+@ randomforestregressor__min_impurity_split randomforestregressor__min_impurity_split: None
+@ randomforestregressor__min_samples_leaf randomforestregressor__min_samples_leaf: 1
+@ randomforestregressor__min_samples_split randomforestregressor__min_samples_split: 2
+@ randomforestregressor__min_weight_fraction_leaf randomforestregressor__min_weight_fraction_leaf: 0.0
+@ randomforestregressor__n_estimators randomforestregressor__n_estimators: 100
+* randomforestregressor__n_jobs randomforestregressor__n_jobs: 1
+@ randomforestregressor__oob_score randomforestregressor__oob_score: False
+@ randomforestregressor__random_state randomforestregressor__random_state: 42
+* randomforestregressor__verbose randomforestregressor__verbose: 0
+@ randomforestregressor__warm_start randomforestregressor__warm_start: False
+ Note: @, searchable params in searchcv too.
diff -r 39ae276e75d9 -r e94395c672bd test-data/get_params06.tabular
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/get_params06.tabular Tue May 14 18:15:12 2019 -0400
@@ -0,0 +1,22 @@
+ Parameter Value
+* memory memory: None
+* steps "steps: [('pca', PCA(copy=True, iterated_power='auto', n_components=None, random_state=None,
+ svd_solver='auto', tol=0.0, whiten=False)), ('adaboostregressor', AdaBoostRegressor(base_estimator=None, learning_rate=1.0, loss='linear',
+ n_estimators=50, random_state=None))]"
+@ pca "pca: PCA(copy=True, iterated_power='auto', n_components=None, random_state=None,
+ svd_solver='auto', tol=0.0, whiten=False)"
+@ adaboostregressor "adaboostregressor: AdaBoostRegressor(base_estimator=None, learning_rate=1.0, loss='linear',
+ n_estimators=50, random_state=None)"
+@ pca__copy pca__copy: True
+@ pca__iterated_power pca__iterated_power: 'auto'
+@ pca__n_components pca__n_components: None
+@ pca__random_state pca__random_state: None
+@ pca__svd_solver pca__svd_solver: 'auto'
+@ pca__tol pca__tol: 0.0
+@ pca__whiten pca__whiten: False
+@ adaboostregressor__base_estimator adaboostregressor__base_estimator: None
+@ adaboostregressor__learning_rate adaboostregressor__learning_rate: 1.0
+@ adaboostregressor__loss adaboostregressor__loss: 'linear'
+@ adaboostregressor__n_estimators adaboostregressor__n_estimators: 50
+@ adaboostregressor__random_state adaboostregressor__random_state: None
+ Note: @, searchable params in searchcv too.
diff -r 39ae276e75d9 -r e94395c672bd test-data/get_params07.tabular
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/get_params07.tabular Tue May 14 18:15:12 2019 -0400
@@ -0,0 +1,16 @@
+ Parameter Value
+* memory memory: None
+* steps "steps: [('rbfsampler', RBFSampler(gamma=2.0, n_components=10, random_state=None)), ('adaboostclassifier', AdaBoostClassifier(algorithm='SAMME.R', base_estimator=None,
+ learning_rate=1.0, n_estimators=50, random_state=None))]"
+@ rbfsampler rbfsampler: RBFSampler(gamma=2.0, n_components=10, random_state=None)
+@ adaboostclassifier "adaboostclassifier: AdaBoostClassifier(algorithm='SAMME.R', base_estimator=None,
+ learning_rate=1.0, n_estimators=50, random_state=None)"
+@ rbfsampler__gamma rbfsampler__gamma: 2.0
+@ rbfsampler__n_components rbfsampler__n_components: 10
+@ rbfsampler__random_state rbfsampler__random_state: None
+@ adaboostclassifier__algorithm adaboostclassifier__algorithm: 'SAMME.R'
+@ adaboostclassifier__base_estimator adaboostclassifier__base_estimator: None
+@ adaboostclassifier__learning_rate adaboostclassifier__learning_rate: 1.0
+@ adaboostclassifier__n_estimators adaboostclassifier__n_estimators: 50
+@ adaboostclassifier__random_state adaboostclassifier__random_state: None
+ Note: @, searchable params in searchcv too.
diff -r 39ae276e75d9 -r e94395c672bd test-data/get_params08.tabular
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/get_params08.tabular Tue May 14 18:15:12 2019 -0400
@@ -0,0 +1,24 @@
+ Parameter Value
+* memory memory: None
+* steps "steps: [('featureagglomeration', FeatureAgglomeration(affinity='euclidean', compute_full_tree='auto',
+ connectivity=None, linkage='ward', memory=None, n_clusters=3,
+ pooling_func=)), ('adaboostclassifier', AdaBoostClassifier(algorithm='SAMME.R', base_estimator=None,
+ learning_rate=1.0, n_estimators=50, random_state=None))]"
+@ featureagglomeration "featureagglomeration: FeatureAgglomeration(affinity='euclidean', compute_full_tree='auto',
+ connectivity=None, linkage='ward', memory=None, n_clusters=3,
+ pooling_func=)"
+@ adaboostclassifier "adaboostclassifier: AdaBoostClassifier(algorithm='SAMME.R', base_estimator=None,
+ learning_rate=1.0, n_estimators=50, random_state=None)"
+@ featureagglomeration__affinity featureagglomeration__affinity: 'euclidean'
+@ featureagglomeration__compute_full_tree featureagglomeration__compute_full_tree: 'auto'
+@ featureagglomeration__connectivity featureagglomeration__connectivity: None
+@ featureagglomeration__linkage featureagglomeration__linkage: 'ward'
+* featureagglomeration__memory featureagglomeration__memory: None
+@ featureagglomeration__n_clusters featureagglomeration__n_clusters: 3
+@ featureagglomeration__pooling_func featureagglomeration__pooling_func:
+@ adaboostclassifier__algorithm adaboostclassifier__algorithm: 'SAMME.R'
+@ adaboostclassifier__base_estimator adaboostclassifier__base_estimator: None
+@ adaboostclassifier__learning_rate adaboostclassifier__learning_rate: 1.0
+@ adaboostclassifier__n_estimators adaboostclassifier__n_estimators: 50
+@ adaboostclassifier__random_state adaboostclassifier__random_state: None
+ Note: @, searchable params in searchcv too.
diff -r 39ae276e75d9 -r e94395c672bd test-data/get_params09.tabular
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/get_params09.tabular Tue May 14 18:15:12 2019 -0400
@@ -0,0 +1,39 @@
+ Parameter Value
+* memory memory: None
+* steps "steps: [('relieff', ReliefF(discrete_threshold=10, n_features_to_select=3, n_jobs=1,
+ n_neighbors=100, verbose=False)), ('randomforestregressor', RandomForestRegressor(bootstrap=True, criterion='mse', max_depth=None,
+ max_features='auto', max_leaf_nodes=None,
+ min_impurity_decrease=0.0, min_impurity_split=None,
+ min_samples_leaf=1, min_samples_split=2,
+ min_weight_fraction_leaf=0.0, n_estimators='warn', n_jobs=1,
+ oob_score=False, random_state=None, verbose=0, warm_start=False))]"
+@ relieff "relieff: ReliefF(discrete_threshold=10, n_features_to_select=3, n_jobs=1,
+ n_neighbors=100, verbose=False)"
+@ randomforestregressor "randomforestregressor: RandomForestRegressor(bootstrap=True, criterion='mse', max_depth=None,
+ max_features='auto', max_leaf_nodes=None,
+ min_impurity_decrease=0.0, min_impurity_split=None,
+ min_samples_leaf=1, min_samples_split=2,
+ min_weight_fraction_leaf=0.0, n_estimators='warn', n_jobs=1,
+ oob_score=False, random_state=None, verbose=0, warm_start=False)"
+@ relieff__discrete_threshold relieff__discrete_threshold: 10
+@ relieff__n_features_to_select relieff__n_features_to_select: 3
+* relieff__n_jobs relieff__n_jobs: 1
+@ relieff__n_neighbors relieff__n_neighbors: 100
+* relieff__verbose relieff__verbose: False
+@ randomforestregressor__bootstrap randomforestregressor__bootstrap: True
+@ randomforestregressor__criterion randomforestregressor__criterion: 'mse'
+@ randomforestregressor__max_depth randomforestregressor__max_depth: None
+@ randomforestregressor__max_features randomforestregressor__max_features: 'auto'
+@ randomforestregressor__max_leaf_nodes randomforestregressor__max_leaf_nodes: None
+@ randomforestregressor__min_impurity_decrease randomforestregressor__min_impurity_decrease: 0.0
+@ randomforestregressor__min_impurity_split randomforestregressor__min_impurity_split: None
+@ randomforestregressor__min_samples_leaf randomforestregressor__min_samples_leaf: 1
+@ randomforestregressor__min_samples_split randomforestregressor__min_samples_split: 2
+@ randomforestregressor__min_weight_fraction_leaf randomforestregressor__min_weight_fraction_leaf: 0.0
+@ randomforestregressor__n_estimators randomforestregressor__n_estimators: 'warn'
+* randomforestregressor__n_jobs randomforestregressor__n_jobs: 1
+@ randomforestregressor__oob_score randomforestregressor__oob_score: False
+@ randomforestregressor__random_state randomforestregressor__random_state: None
+* randomforestregressor__verbose randomforestregressor__verbose: 0
+@ randomforestregressor__warm_start randomforestregressor__warm_start: False
+ Note: @, searchable params in searchcv too.
diff -r 39ae276e75d9 -r e94395c672bd test-data/get_params10.tabular
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/get_params10.tabular Tue May 14 18:15:12 2019 -0400
@@ -0,0 +1,12 @@
+ Parameter Value
+* memory memory: None
+* steps "steps: [('adaboostregressor', AdaBoostRegressor(base_estimator=None, learning_rate=1.0, loss='linear',
+ n_estimators=50, random_state=None))]"
+@ adaboostregressor "adaboostregressor: AdaBoostRegressor(base_estimator=None, learning_rate=1.0, loss='linear',
+ n_estimators=50, random_state=None)"
+@ adaboostregressor__base_estimator adaboostregressor__base_estimator: None
+@ adaboostregressor__learning_rate adaboostregressor__learning_rate: 1.0
+@ adaboostregressor__loss adaboostregressor__loss: 'linear'
+@ adaboostregressor__n_estimators adaboostregressor__n_estimators: 50
+@ adaboostregressor__random_state adaboostregressor__random_state: None
+ Note: @, params eligible for search in searchcv tool.
diff -r 39ae276e75d9 -r e94395c672bd test-data/get_params11.tabular
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/get_params11.tabular Tue May 14 18:15:12 2019 -0400
@@ -0,0 +1,46 @@
+ Parameter Value
+* memory memory: None
+* steps "steps: [('editednearestneighbours', EditedNearestNeighbours(kind_sel='all', n_jobs=1, n_neighbors=3,
+ random_state=None, ratio=None, return_indices=False,
+ sampling_strategy='auto')), ('randomforestclassifier', RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
+ max_depth=None, max_features='auto', max_leaf_nodes=None,
+ min_impurity_decrease=0.0, min_impurity_split=None,
+ min_samples_leaf=1, min_samples_split=2,
+ min_weight_fraction_leaf=0.0, n_estimators='warn', n_jobs=1,
+ oob_score=False, random_state=None, verbose=0,
+ warm_start=False))]"
+@ editednearestneighbours "editednearestneighbours: EditedNearestNeighbours(kind_sel='all', n_jobs=1, n_neighbors=3,
+ random_state=None, ratio=None, return_indices=False,
+ sampling_strategy='auto')"
+@ randomforestclassifier "randomforestclassifier: RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
+ max_depth=None, max_features='auto', max_leaf_nodes=None,
+ min_impurity_decrease=0.0, min_impurity_split=None,
+ min_samples_leaf=1, min_samples_split=2,
+ min_weight_fraction_leaf=0.0, n_estimators='warn', n_jobs=1,
+ oob_score=False, random_state=None, verbose=0,
+ warm_start=False)"
+@ editednearestneighbours__kind_sel editednearestneighbours__kind_sel: 'all'
+* editednearestneighbours__n_jobs editednearestneighbours__n_jobs: 1
+@ editednearestneighbours__n_neighbors editednearestneighbours__n_neighbors: 3
+@ editednearestneighbours__random_state editednearestneighbours__random_state: None
+@ editednearestneighbours__ratio editednearestneighbours__ratio: None
+@ editednearestneighbours__return_indices editednearestneighbours__return_indices: False
+@ editednearestneighbours__sampling_strategy editednearestneighbours__sampling_strategy: 'auto'
+@ randomforestclassifier__bootstrap randomforestclassifier__bootstrap: True
+@ randomforestclassifier__class_weight randomforestclassifier__class_weight: None
+@ randomforestclassifier__criterion randomforestclassifier__criterion: 'gini'
+@ randomforestclassifier__max_depth randomforestclassifier__max_depth: None
+@ randomforestclassifier__max_features randomforestclassifier__max_features: 'auto'
+@ randomforestclassifier__max_leaf_nodes randomforestclassifier__max_leaf_nodes: None
+@ randomforestclassifier__min_impurity_decrease randomforestclassifier__min_impurity_decrease: 0.0
+@ randomforestclassifier__min_impurity_split randomforestclassifier__min_impurity_split: None
+@ randomforestclassifier__min_samples_leaf randomforestclassifier__min_samples_leaf: 1
+@ randomforestclassifier__min_samples_split randomforestclassifier__min_samples_split: 2
+@ randomforestclassifier__min_weight_fraction_leaf randomforestclassifier__min_weight_fraction_leaf: 0.0
+@ randomforestclassifier__n_estimators randomforestclassifier__n_estimators: 'warn'
+* randomforestclassifier__n_jobs randomforestclassifier__n_jobs: 1
+@ randomforestclassifier__oob_score randomforestclassifier__oob_score: False
+@ randomforestclassifier__random_state randomforestclassifier__random_state: None
+* randomforestclassifier__verbose randomforestclassifier__verbose: 0
+@ randomforestclassifier__warm_start randomforestclassifier__warm_start: False
+ Note: @, searchable params in searchcv too.
diff -r 39ae276e75d9 -r e94395c672bd test-data/get_params12.tabular
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/get_params12.tabular Tue May 14 18:15:12 2019 -0400
@@ -0,0 +1,47 @@
+ Parameter Value
+* memory memory: None
+* steps "steps: [('rfe', RFE(estimator=XGBRegressor(base_score=0.5, booster='gbtree', colsample_bylevel=1,
+ colsample_bytree=1, gamma=0, learning_rate=0.1, max_delta_step=0,
+ max_depth=3, min_child_weight=1, missing=nan, n_estimators=100,
+ n_jobs=1, nthread=None, objective='reg:linear', random_state=0,
+ reg_alpha=0, reg_lambda=1, scale_pos_weight=1, seed=None,
+ silent=True, subsample=1),
+ n_features_to_select=None, step=1, verbose=0))]"
+@ rfe "rfe: RFE(estimator=XGBRegressor(base_score=0.5, booster='gbtree', colsample_bylevel=1,
+ colsample_bytree=1, gamma=0, learning_rate=0.1, max_delta_step=0,
+ max_depth=3, min_child_weight=1, missing=nan, n_estimators=100,
+ n_jobs=1, nthread=None, objective='reg:linear', random_state=0,
+ reg_alpha=0, reg_lambda=1, scale_pos_weight=1, seed=None,
+ silent=True, subsample=1),
+ n_features_to_select=None, step=1, verbose=0)"
+@ rfe__estimator__base_score rfe__estimator__base_score: 0.5
+@ rfe__estimator__booster rfe__estimator__booster: 'gbtree'
+@ rfe__estimator__colsample_bylevel rfe__estimator__colsample_bylevel: 1
+@ rfe__estimator__colsample_bytree rfe__estimator__colsample_bytree: 1
+@ rfe__estimator__gamma rfe__estimator__gamma: 0
+@ rfe__estimator__learning_rate rfe__estimator__learning_rate: 0.1
+@ rfe__estimator__max_delta_step rfe__estimator__max_delta_step: 0
+@ rfe__estimator__max_depth rfe__estimator__max_depth: 3
+@ rfe__estimator__min_child_weight rfe__estimator__min_child_weight: 1
+@ rfe__estimator__missing rfe__estimator__missing: nan
+@ rfe__estimator__n_estimators rfe__estimator__n_estimators: 100
+* rfe__estimator__n_jobs rfe__estimator__n_jobs: 1
+* rfe__estimator__nthread rfe__estimator__nthread: None
+@ rfe__estimator__objective rfe__estimator__objective: 'reg:linear'
+@ rfe__estimator__random_state rfe__estimator__random_state: 0
+@ rfe__estimator__reg_alpha rfe__estimator__reg_alpha: 0
+@ rfe__estimator__reg_lambda rfe__estimator__reg_lambda: 1
+@ rfe__estimator__scale_pos_weight rfe__estimator__scale_pos_weight: 1
+@ rfe__estimator__seed rfe__estimator__seed: None
+@ rfe__estimator__silent rfe__estimator__silent: True
+@ rfe__estimator__subsample rfe__estimator__subsample: 1
+@ rfe__estimator "rfe__estimator: XGBRegressor(base_score=0.5, booster='gbtree', colsample_bylevel=1,
+ colsample_bytree=1, gamma=0, learning_rate=0.1, max_delta_step=0,
+ max_depth=3, min_child_weight=1, missing=nan, n_estimators=100,
+ n_jobs=1, nthread=None, objective='reg:linear', random_state=0,
+ reg_alpha=0, reg_lambda=1, scale_pos_weight=1, seed=None,
+ silent=True, subsample=1)"
+@ rfe__n_features_to_select rfe__n_features_to_select: None
+@ rfe__step rfe__step: 1
+* rfe__verbose rfe__verbose: 0
+ Note: @, searchable params in searchcv too.
diff -r 39ae276e75d9 -r e94395c672bd test-data/mv_result01.tabular
--- a/test-data/mv_result01.tabular Sun Dec 30 01:56:11 2018 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,3 +0,0 @@
-0.9452947345848994
-0.9926363525448115
--0.4384003222944141
diff -r 39ae276e75d9 -r e94395c672bd test-data/mv_result02.tabular
--- a/test-data/mv_result02.tabular Sun Dec 30 01:56:11 2018 -0500
+++ b/test-data/mv_result02.tabular Tue May 14 18:15:12 2019 -0400
@@ -1,10 +1,11 @@
-1.6957921248350636
--0.9248588846061156
--0.48640795813792376
-0.647707440306449
-0.32740690920811427
--0.8229559569886034
-1.2150108977866847
-0.14723254190255275
-0.6053186541119763
-0.3972102859168325
+Predicted
+1.578912095858962
+-1.199072894940544
+-0.7173258906076226
+0.3255908318822695
+0.21919344304093213
+-0.6841926371423699
+1.1144698671662865
+0.19379531649046616
+0.9405094785593062
+1.2581284896870837
diff -r 39ae276e75d9 -r e94395c672bd test-data/mv_result03.tabular
--- a/test-data/mv_result03.tabular Sun Dec 30 01:56:11 2018 -0500
+++ b/test-data/mv_result03.tabular Tue May 14 18:15:12 2019 -0400
@@ -1,3 +1,6 @@
-0.9452947345848994
-0.9926363525448115
--0.4384003222944141
+train_sizes_abs mean_train_scores std_train_scores mean_test_scores std_test_scores
+17 0.9668700841937653 0.00277836829836518 0.7008862995946905 0.03857541198731935
+56 0.9730008602419361 0.006839342612121988 0.7963376762427242 0.004846330083938778
+95 0.9728783377589098 0.0037790183626530663 0.814592845745573 0.020457691766770824
+134 0.9739086338111185 0.001627343246847077 0.7985540571195479 0.03954641079310707
+174 0.9726218628287785 0.0032867750457225182 0.8152971572131146 0.04280261115004303
diff -r 39ae276e75d9 -r e94395c672bd test-data/mv_result04.tabular
--- a/test-data/mv_result04.tabular Sun Dec 30 01:56:11 2018 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,5 +0,0 @@
-17
-56
-95
-134
-174
diff -r 39ae276e75d9 -r e94395c672bd test-data/mv_result05.tabular
--- a/test-data/mv_result05.tabular Sun Dec 30 01:56:11 2018 -0500
+++ b/test-data/mv_result05.tabular Tue May 14 18:15:12 2019 -0400
@@ -1,1 +1,262 @@
-0.4998435882784322
+Predicted
+70.16
+62.06
+83.04
+62.84
+48.63
+51.25
+54.98
+80.3
+42.84
+41.52
+43.83
+73.15
+74.22
+42.88
+74.93
+72.9
+53.74
+78.86
+59.0
+40.28
+54.52
+58.34
+62.74
+62.35
+49.15
+41.92
+65.59
+59.91
+66.49
+72.08
+60.44
+53.84
+54.82
+52.66
+42.37
+61.3
+63.14
+50.62
+42.75
+47.39
+67.8
+73.58
+49.97
+67.04
+67.45
+54.67
+64.87
+77.23
+73.52
+53.55
+70.53
+77.98
+61.99
+53.08
+78.12
+66.55
+63.95
+60.57
+61.6
+60.37
+55.29
+54.31
+52.54
+65.31
+61.51
+57.3
+60.02
+43.64
+74.78
+68.26
+42.72
+61.26
+61.25
+71.58
+61.03
+70.53
+70.25
+43.4
+71.39
+72.31
+72.7
+72.11
+53.55
+43.4
+80.6
+73.72
+58.86
+76.71
+68.36
+60.26
+48.56
+38.96
+69.67
+52.9
+67.63
+75.12
+70.92
+70.89
+67.05
+43.89
+59.94
+62.98
+71.1
+79.22
+77.31
+79.06
+61.11
+66.32
+54.7
+61.1
+54.59
+58.7
+59.6
+73.79
+72.69
+81.83
+61.08
+69.21
+74.8
+54.37
+50.85
+53.07
+58.53
+55.44
+72.62
+54.14
+68.12
+48.81
+50.11
+56.06
+73.63
+63.29
+71.0
+74.87
+81.24
+54.67
+66.96
+61.37
+74.84
+76.71
+69.27
+56.53
+71.91
+58.74
+77.83
+64.57
+51.93
+42.84
+64.11
+59.47
+42.46
+43.79
+51.75
+63.98
+54.71
+64.95
+79.72
+72.12
+60.66
+79.3
+71.26
+59.9
+74.25
+59.68
+52.37
+78.52
+58.52
+71.98
+71.77
+54.48
+48.96
+81.42
+54.08
+53.52
+64.38
+70.79
+63.95
+67.48
+61.76
+66.15
+62.1
+75.68
+69.72
+43.8
+56.27
+53.38
+81.31
+57.54
+48.15
+59.47
+78.01
+56.39
+72.33
+78.8
+78.66
+52.01
+66.68
+48.56
+47.75
+65.67
+77.93
+72.68
+58.0
+77.83
+73.37
+65.39
+69.79
+55.98
+46.35
+54.31
+55.58
+79.69
+52.76
+62.62
+66.54
+60.29
+62.57
+74.86
+48.05
+65.09
+65.02
+67.84
+41.86
+62.28
+57.05
+43.68
+72.0
+63.04
+54.41
+73.37
+75.11
+42.65
+73.16
+71.68
+58.61
+53.54
+73.33
+72.16
+49.96
+54.78
+64.24
+60.13
+76.46
+61.53
+68.36
+53.1
+71.33
+76.12
+70.86
+61.35
+67.12
+43.25
+80.2
+71.16
+58.63
+52.37
+74.93
+53.34
+76.41
+63.87
+59.97
diff -r 39ae276e75d9 -r e94395c672bd test-data/mv_result06.tabular
--- a/test-data/mv_result06.tabular Sun Dec 30 01:56:11 2018 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,5 +0,0 @@
-0.07547169811320754 0.10344827586206896 0.10294117647058823
-0.07547169811320754 0.10344827586206896 0.10294117647058823
-0.07547169811320754 0.10344827586206896 0.10294117647058823
-0.07547169811320754 0.10344827586206896 0.10294117647058823
-0.07547169811320754 0.10344827586206896 0.10294117647058823
diff -r 39ae276e75d9 -r e94395c672bd test-data/named_steps.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/named_steps.txt Tue May 14 18:15:12 2019 -0400
@@ -0,0 +1,6 @@
+{'preprocessing_1': SelectKBest(k=10, score_func=), 'estimator': XGBRegressor(base_score=0.5, booster='gbtree', colsample_bylevel=1,
+ colsample_bytree=1, gamma=0, learning_rate=0.1, max_delta_step=0,
+ max_depth=3, min_child_weight=1, missing=nan, n_estimators=100,
+ n_jobs=1, nthread=None, objective='reg:linear', random_state=10,
+ reg_alpha=0, reg_lambda=1, scale_pos_weight=1, seed=None,
+ silent=True, subsample=1)}
\ No newline at end of file
diff -r 39ae276e75d9 -r e94395c672bd test-data/nn_model01
Binary file test-data/nn_model01 has changed
diff -r 39ae276e75d9 -r e94395c672bd test-data/pipeline01
Binary file test-data/pipeline01 has changed
diff -r 39ae276e75d9 -r e94395c672bd test-data/pipeline02
Binary file test-data/pipeline02 has changed
diff -r 39ae276e75d9 -r e94395c672bd test-data/pipeline03
Binary file test-data/pipeline03 has changed
diff -r 39ae276e75d9 -r e94395c672bd test-data/pipeline04
Binary file test-data/pipeline04 has changed
diff -r 39ae276e75d9 -r e94395c672bd test-data/pipeline05
Binary file test-data/pipeline05 has changed
diff -r 39ae276e75d9 -r e94395c672bd test-data/pipeline06
Binary file test-data/pipeline06 has changed
diff -r 39ae276e75d9 -r e94395c672bd test-data/pipeline07
Binary file test-data/pipeline07 has changed
diff -r 39ae276e75d9 -r e94395c672bd test-data/pipeline08
Binary file test-data/pipeline08 has changed
diff -r 39ae276e75d9 -r e94395c672bd test-data/pipeline09
Binary file test-data/pipeline09 has changed
diff -r 39ae276e75d9 -r e94395c672bd test-data/pipeline10
Binary file test-data/pipeline10 has changed
diff -r 39ae276e75d9 -r e94395c672bd test-data/pipeline11
Binary file test-data/pipeline11 has changed
diff -r 39ae276e75d9 -r e94395c672bd test-data/pipeline12
Binary file test-data/pipeline12 has changed
diff -r 39ae276e75d9 -r e94395c672bd test-data/pipeline13
Binary file test-data/pipeline13 has changed
diff -r 39ae276e75d9 -r e94395c672bd test-data/pipeline14
Binary file test-data/pipeline14 has changed
diff -r 39ae276e75d9 -r e94395c672bd test-data/pipeline15
Binary file test-data/pipeline15 has changed
diff -r 39ae276e75d9 -r e94395c672bd test-data/ranking_.tabular
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/ranking_.tabular Tue May 14 18:15:12 2019 -0400
@@ -0,0 +1,18 @@
+ranking_
+17
+7
+4
+5
+2
+1
+9
+6
+8
+3
+10
+15
+14
+11
+13
+12
+16
diff -r 39ae276e75d9 -r e94395c672bd test-data/searchCV01
Binary file test-data/searchCV01 has changed
diff -r 39ae276e75d9 -r e94395c672bd test-data/searchCV02
Binary file test-data/searchCV02 has changed
diff -r 39ae276e75d9 -r e94395c672bd utils.py
--- a/utils.py Sun Dec 30 01:56:11 2018 -0500
+++ b/utils.py Tue May 14 18:15:12 2019 -0400
@@ -1,80 +1,134 @@
+import ast
import json
+import imblearn
import numpy as np
-import os
import pandas
import pickle
import re
import scipy
import sklearn
+import skrebate
import sys
import warnings
import xgboost
+from collections import Counter
from asteval import Interpreter, make_symbol_table
-from sklearn import (cluster, compose, decomposition, ensemble, feature_extraction,
- feature_selection, gaussian_process, kernel_approximation, metrics,
- model_selection, naive_bayes, neighbors, pipeline, preprocessing,
- svm, linear_model, tree, discriminant_analysis)
+from imblearn import under_sampling, over_sampling, combine
+from imblearn.pipeline import Pipeline as imbPipeline
+from mlxtend import regressor, classifier
+from scipy.io import mmread
+from sklearn import (
+ cluster, compose, decomposition, ensemble, feature_extraction,
+ feature_selection, gaussian_process, kernel_approximation, metrics,
+ model_selection, naive_bayes, neighbors, pipeline, preprocessing,
+ svm, linear_model, tree, discriminant_analysis)
+
+try:
+ import iraps_classifier
+except ImportError:
+ pass
try:
- import skrebate
-except ModuleNotFoundError:
+ import model_validations
+except ImportError:
+ pass
+
+try:
+ import feature_selectors
+except ImportError:
pass
-
-N_JOBS = int(os.environ.get('GALAXY_SLOTS', 1))
+try:
+ import preprocessors
+except ImportError:
+ pass
-try:
- sk_whitelist
-except NameError:
- sk_whitelist = None
+# handle pickle white list file
+WL_FILE = __import__('os').path.join(
+ __import__('os').path.dirname(__file__), 'pk_whitelist.json')
+
+N_JOBS = int(__import__('os').environ.get('GALAXY_SLOTS', 1))
-class SafePickler(pickle.Unpickler):
+class _SafePickler(pickle.Unpickler, object):
"""
- Used to safely deserialize scikit-learn model objects serialized by cPickle.dump
+ Used to safely deserialize scikit-learn model objects
Usage:
- eg.: SafePickler.load(pickled_file_object)
+ eg.: _SafePickler.load(pickled_file_object)
"""
- def find_class(self, module, name):
+ def __init__(self, file):
+ super(_SafePickler, self).__init__(file)
+ # load global white list
+ with open(WL_FILE, 'r') as f:
+ self.pk_whitelist = json.load(f)
- # sk_whitelist could be read from tool
- global sk_whitelist
- if not sk_whitelist:
- whitelist_file = os.path.join(os.path.dirname(__file__), 'sk_whitelist.json')
- with open(whitelist_file, 'r') as f:
- sk_whitelist = json.load(f)
+ self.bad_names = (
+ 'and', 'as', 'assert', 'break', 'class', 'continue',
+ 'def', 'del', 'elif', 'else', 'except', 'exec',
+ 'finally', 'for', 'from', 'global', 'if', 'import',
+ 'in', 'is', 'lambda', 'not', 'or', 'pass', 'print',
+ 'raise', 'return', 'try', 'system', 'while', 'with',
+ 'True', 'False', 'None', 'eval', 'execfile', '__import__',
+ '__package__', '__subclasses__', '__bases__', '__globals__',
+ '__code__', '__closure__', '__func__', '__self__', '__module__',
+ '__dict__', '__class__', '__call__', '__get__',
+ '__getattribute__', '__subclasshook__', '__new__',
+ '__init__', 'func_globals', 'func_code', 'func_closure',
+ 'im_class', 'im_func', 'im_self', 'gi_code', 'gi_frame',
+ '__asteval__', 'f_locals', '__mro__')
- bad_names = ('and', 'as', 'assert', 'break', 'class', 'continue',
- 'def', 'del', 'elif', 'else', 'except', 'exec',
- 'finally', 'for', 'from', 'global', 'if', 'import',
- 'in', 'is', 'lambda', 'not', 'or', 'pass', 'print',
- 'raise', 'return', 'try', 'system', 'while', 'with',
- 'True', 'False', 'None', 'eval', 'execfile', '__import__',
- '__package__', '__subclasses__', '__bases__', '__globals__',
- '__code__', '__closure__', '__func__', '__self__', '__module__',
- '__dict__', '__class__', '__call__', '__get__',
- '__getattribute__', '__subclasshook__', '__new__',
- '__init__', 'func_globals', 'func_code', 'func_closure',
- 'im_class', 'im_func', 'im_self', 'gi_code', 'gi_frame',
- '__asteval__', 'f_locals', '__mro__')
- good_names = ['copy_reg._reconstructor', '__builtin__.object']
+ # unclassified good globals
+ self.good_names = [
+ 'copy_reg._reconstructor', '__builtin__.object',
+ '__builtin__.bytearray', 'builtins.object',
+ 'builtins.bytearray', 'keras.engine.sequential.Sequential',
+ 'keras.engine.sequential.Model']
+
+ # custom module in Galaxy-ML
+ self.custom_modules = [
+ '__main__', 'keras_galaxy_models', 'feature_selectors',
+ 'preprocessors', 'iraps_classifier', 'model_validations']
+ # override
+ def find_class(self, module, name):
+ # balack list first
+ if name in self.bad_names:
+ raise pickle.UnpicklingError("global '%s.%s' is forbidden"
+ % (module, name))
+
+ # custom module in Galaxy-ML
+ if module in self.custom_modules:
+ cutom_module = sys.modules.get(module, None)
+ if cutom_module:
+ return getattr(cutom_module, name)
+ else:
+ raise pickle.UnpicklingError("Module %s' is not imported"
+ % module)
+
+ # For objects from outside libraries, it's necessary to verify
+ # both module and name. Currently only a blacklist checker
+ # is working.
+ # TODO: replace with a whitelist checker.
+ good_names = self.good_names
+ pk_whitelist = self.pk_whitelist
if re.match(r'^[a-zA-Z_][a-zA-Z0-9_]*$', name):
fullname = module + '.' + name
if (fullname in good_names)\
- or ( ( module.startswith('sklearn.')
- or module.startswith('xgboost.')
- or module.startswith('skrebate.')
- or module.startswith('imblearn')
- or module.startswith('numpy.')
- or module == 'numpy'
- )
- and (name not in bad_names)
- ):
- # TODO: replace with a whitelist checker
- if fullname not in sk_whitelist['SK_NAMES'] + sk_whitelist['SKR_NAMES'] + sk_whitelist['XGB_NAMES'] + sk_whitelist['NUMPY_NAMES'] + sk_whitelist['IMBLEARN_NAMES'] + good_names:
- print("Warning: global %s is not in pickler whitelist yet and will loss support soon. Contact tool author or leave a message at github.com" % fullname)
+ or (module.startswith(('sklearn.', 'xgboost.', 'skrebate.',
+ 'imblearn.', 'mlxtend.', 'numpy.'))
+ or module == 'numpy'):
+ if fullname not in (pk_whitelist['SK_NAMES'] +
+ pk_whitelist['SKR_NAMES'] +
+ pk_whitelist['XGB_NAMES'] +
+ pk_whitelist['NUMPY_NAMES'] +
+ pk_whitelist['IMBLEARN_NAMES'] +
+ pk_whitelist['MLXTEND_NAMES'] +
+ good_names):
+ # raise pickle.UnpicklingError
+ print("Warning: global %s is not in pickler whitelist "
+ "yet and will loss support soon. Contact tool "
+ "author or leave a message at github.com" % fullname)
mod = sys.modules[module]
return getattr(mod, name)
@@ -82,10 +136,15 @@
def load_model(file):
- return SafePickler(file).load()
+ """Load pickled object with `_SafePicker`
+ """
+ return _SafePickler(file).load()
-def read_columns(f, c=None, c_option='by_index_number', return_df=False, **args):
+def read_columns(f, c=None, c_option='by_index_number',
+ return_df=False, **args):
+ """Return array from a tabular dataset by various columns selection
+ """
data = pandas.read_csv(f, **args)
if c_option == 'by_index_number':
cols = list(map(lambda x: x - 1, c))
@@ -106,10 +165,21 @@
return y
-## generate an instance for one of sklearn.feature_selection classes
-def feature_selector(inputs):
+def feature_selector(inputs, X=None, y=None):
+ """generate an instance of sklearn.feature_selection classes
+
+ Parameters
+ ----------
+ inputs : dict
+ From galaxy tool parameters.
+ X : array
+ Containing training features.
+ y : array or list
+ Target values.
+ """
selector = inputs['selected_algorithm']
- selector = getattr(sklearn.feature_selection, selector)
+ if selector != 'DyRFECV':
+ selector = getattr(sklearn.feature_selection, selector)
options = inputs['options']
if inputs['selected_algorithm'] == 'SelectFromModel':
@@ -128,27 +198,60 @@
else:
estimator_json = inputs['model_inputter']['estimator_selector']
estimator = get_estimator(estimator_json)
+ check_feature_importances = try_get_attr(
+ 'feature_selectors', 'check_feature_importances')
+ estimator = check_feature_importances(estimator)
new_selector = selector(estimator, **options)
elif inputs['selected_algorithm'] == 'RFE':
- estimator = get_estimator(inputs['estimator_selector'])
step = options.get('step', None)
if step and step >= 1.0:
options['step'] = int(step)
+ estimator = get_estimator(inputs["estimator_selector"])
+ check_feature_importances = try_get_attr(
+ 'feature_selectors', 'check_feature_importances')
+ estimator = check_feature_importances(estimator)
new_selector = selector(estimator, **options)
elif inputs['selected_algorithm'] == 'RFECV':
options['scoring'] = get_scoring(options['scoring'])
options['n_jobs'] = N_JOBS
splitter, groups = get_cv(options.pop('cv_selector'))
- # TODO support group cv splitters
- options['cv'] = splitter
+ if groups is None:
+ options['cv'] = splitter
+ else:
+ options['cv'] = list(splitter.split(X, y, groups=groups))
step = options.get('step', None)
if step and step >= 1.0:
options['step'] = int(step)
estimator = get_estimator(inputs['estimator_selector'])
+ check_feature_importances = try_get_attr(
+ 'feature_selectors', 'check_feature_importances')
+ estimator = check_feature_importances(estimator)
new_selector = selector(estimator, **options)
+ elif inputs['selected_algorithm'] == 'DyRFECV':
+ options['scoring'] = get_scoring(options['scoring'])
+ options['n_jobs'] = N_JOBS
+ splitter, groups = get_cv(options.pop('cv_selector'))
+ if groups is None:
+ options['cv'] = splitter
+ else:
+ options['cv'] = list(splitter.split(X, y, groups=groups))
+ step = options.get('step')
+ if not step or step == 'None':
+ step = None
+ else:
+ step = ast.literal_eval(step)
+ options['step'] = step
+ estimator = get_estimator(inputs["estimator_selector"])
+ check_feature_importances = try_get_attr(
+ 'feature_selectors', 'check_feature_importances')
+ estimator = check_feature_importances(estimator)
+ DyRFECV = try_get_attr('feature_selectors', 'DyRFECV')
+
+ new_selector = DyRFECV(estimator, **options)
+
elif inputs['selected_algorithm'] == 'VarianceThreshold':
new_selector = selector(**options)
@@ -161,12 +264,20 @@
def get_X_y(params, file1, file2):
- input_type = params['selected_tasks']['selected_algorithms']['input_options']['selected_input']
+ """Return machine learning inputs X, y from tabluar inputs
+ """
+ input_type = (params['selected_tasks']['selected_algorithms']
+ ['input_options']['selected_input'])
if input_type == 'tabular':
- header = 'infer' if params['selected_tasks']['selected_algorithms']['input_options']['header1'] else None
- column_option = params['selected_tasks']['selected_algorithms']['input_options']['column_selector_options_1']['selected_column_selector_option']
- if column_option in ['by_index_number', 'all_but_by_index_number', 'by_header_name', 'all_but_by_header_name']:
- c = params['selected_tasks']['selected_algorithms']['input_options']['column_selector_options_1']['col1']
+ header = 'infer' if (params['selected_tasks']['selected_algorithms']
+ ['input_options']['header1']) else None
+ column_option = (params['selected_tasks']['selected_algorithms']
+ ['input_options']['column_selector_options_1']
+ ['selected_column_selector_option'])
+ if column_option in ['by_index_number', 'all_but_by_index_number',
+ 'by_header_name', 'all_but_by_header_name']:
+ c = (params['selected_tasks']['selected_algorithms']
+ ['input_options']['column_selector_options_1']['col1'])
else:
c = None
X = read_columns(
@@ -175,15 +286,19 @@
c_option=column_option,
sep='\t',
header=header,
- parse_dates=True
- )
+ parse_dates=True).astype(float)
else:
X = mmread(file1)
- header = 'infer' if params['selected_tasks']['selected_algorithms']['input_options']['header2'] else None
- column_option = params['selected_tasks']['selected_algorithms']['input_options']['column_selector_options_2']['selected_column_selector_option2']
- if column_option in ['by_index_number', 'all_but_by_index_number', 'by_header_name', 'all_but_by_header_name']:
- c = params['selected_tasks']['selected_algorithms']['input_options']['column_selector_options_2']['col2']
+ header = 'infer' if (params['selected_tasks']['selected_algorithms']
+ ['input_options']['header2']) else None
+ column_option = (params['selected_tasks']['selected_algorithms']
+ ['input_options']['column_selector_options_2']
+ ['selected_column_selector_option2'])
+ if column_option in ['by_index_number', 'all_but_by_index_number',
+ 'by_header_name', 'all_but_by_header_name']:
+ c = (params['selected_tasks']['selected_algorithms']
+ ['input_options']['column_selector_options_2']['col2'])
else:
c = None
y = read_columns(
@@ -192,15 +307,17 @@
c_option=column_option,
sep='\t',
header=header,
- parse_dates=True
- )
+ parse_dates=True)
y = y.ravel()
+
return X, y
class SafeEval(Interpreter):
-
- def __init__(self, load_scipy=False, load_numpy=False, load_estimators=False):
+ """Customized symbol table for safely literal eval
+ """
+ def __init__(self, load_scipy=False, load_numpy=False,
+ load_estimators=False):
# File opening and other unneeded functions could be dropped
unwanted = ['open', 'type', 'dir', 'id', 'str', 'repr']
@@ -208,7 +325,8 @@
# Allowed symbol table. Add more if needed.
new_syms = {
'np_arange': getattr(np, 'arange'),
- 'ensemble_ExtraTreesClassifier': getattr(ensemble, 'ExtraTreesClassifier')
+ 'ensemble_ExtraTreesClassifier':
+ getattr(ensemble, 'ExtraTreesClassifier')
}
syms = make_symbol_table(use_numpy=False, **new_syms)
@@ -216,80 +334,109 @@
if load_scipy:
scipy_distributions = scipy.stats.distributions.__dict__
for k, v in scipy_distributions.items():
- if isinstance(v, (scipy.stats.rv_continuous, scipy.stats.rv_discrete)):
+ if isinstance(v, (scipy.stats.rv_continuous,
+ scipy.stats.rv_discrete)):
syms['scipy_stats_' + k] = v
if load_numpy:
- from_numpy_random = ['beta', 'binomial', 'bytes', 'chisquare', 'choice', 'dirichlet', 'division',
- 'exponential', 'f', 'gamma', 'geometric', 'gumbel', 'hypergeometric',
- 'laplace', 'logistic', 'lognormal', 'logseries', 'mtrand', 'multinomial',
- 'multivariate_normal', 'negative_binomial', 'noncentral_chisquare', 'noncentral_f',
- 'normal', 'pareto', 'permutation', 'poisson', 'power', 'rand', 'randint',
- 'randn', 'random', 'random_integers', 'random_sample', 'ranf', 'rayleigh',
- 'sample', 'seed', 'set_state', 'shuffle', 'standard_cauchy', 'standard_exponential',
- 'standard_gamma', 'standard_normal', 'standard_t', 'triangular', 'uniform',
- 'vonmises', 'wald', 'weibull', 'zipf']
+ from_numpy_random = [
+ 'beta', 'binomial', 'bytes', 'chisquare', 'choice',
+ 'dirichlet', 'division', 'exponential', 'f', 'gamma',
+ 'geometric', 'gumbel', 'hypergeometric', 'laplace',
+ 'logistic', 'lognormal', 'logseries', 'mtrand',
+ 'multinomial', 'multivariate_normal', 'negative_binomial',
+ 'noncentral_chisquare', 'noncentral_f', 'normal', 'pareto',
+ 'permutation', 'poisson', 'power', 'rand', 'randint',
+ 'randn', 'random', 'random_integers', 'random_sample',
+ 'ranf', 'rayleigh', 'sample', 'seed', 'set_state',
+ 'shuffle', 'standard_cauchy', 'standard_exponential',
+ 'standard_gamma', 'standard_normal', 'standard_t',
+ 'triangular', 'uniform', 'vonmises', 'wald', 'weibull', 'zipf']
for f in from_numpy_random:
syms['np_random_' + f] = getattr(np.random, f)
if load_estimators:
estimator_table = {
- 'sklearn_svm' : getattr(sklearn, 'svm'),
- 'sklearn_tree' : getattr(sklearn, 'tree'),
- 'sklearn_ensemble' : getattr(sklearn, 'ensemble'),
- 'sklearn_neighbors' : getattr(sklearn, 'neighbors'),
- 'sklearn_naive_bayes' : getattr(sklearn, 'naive_bayes'),
- 'sklearn_linear_model' : getattr(sklearn, 'linear_model'),
- 'sklearn_cluster' : getattr(sklearn, 'cluster'),
- 'sklearn_decomposition' : getattr(sklearn, 'decomposition'),
- 'sklearn_preprocessing' : getattr(sklearn, 'preprocessing'),
- 'sklearn_feature_selection' : getattr(sklearn, 'feature_selection'),
- 'sklearn_kernel_approximation' : getattr(sklearn, 'kernel_approximation'),
+ 'sklearn_svm': getattr(sklearn, 'svm'),
+ 'sklearn_tree': getattr(sklearn, 'tree'),
+ 'sklearn_ensemble': getattr(sklearn, 'ensemble'),
+ 'sklearn_neighbors': getattr(sklearn, 'neighbors'),
+ 'sklearn_naive_bayes': getattr(sklearn, 'naive_bayes'),
+ 'sklearn_linear_model': getattr(sklearn, 'linear_model'),
+ 'sklearn_cluster': getattr(sklearn, 'cluster'),
+ 'sklearn_decomposition': getattr(sklearn, 'decomposition'),
+ 'sklearn_preprocessing': getattr(sklearn, 'preprocessing'),
+ 'sklearn_feature_selection':
+ getattr(sklearn, 'feature_selection'),
+ 'sklearn_kernel_approximation':
+ getattr(sklearn, 'kernel_approximation'),
'skrebate_ReliefF': getattr(skrebate, 'ReliefF'),
'skrebate_SURF': getattr(skrebate, 'SURF'),
'skrebate_SURFstar': getattr(skrebate, 'SURFstar'),
'skrebate_MultiSURF': getattr(skrebate, 'MultiSURF'),
'skrebate_MultiSURFstar': getattr(skrebate, 'MultiSURFstar'),
'skrebate_TuRF': getattr(skrebate, 'TuRF'),
- 'xgboost_XGBClassifier' : getattr(xgboost, 'XGBClassifier'),
- 'xgboost_XGBRegressor' : getattr(xgboost, 'XGBRegressor')
+ 'xgboost_XGBClassifier': getattr(xgboost, 'XGBClassifier'),
+ 'xgboost_XGBRegressor': getattr(xgboost, 'XGBRegressor'),
+ 'imblearn_over_sampling': getattr(imblearn, 'over_sampling'),
+ 'imblearn_combine': getattr(imblearn, 'combine')
}
syms.update(estimator_table)
for key in unwanted:
syms.pop(key, None)
- super(SafeEval, self).__init__(symtable=syms, use_numpy=False, minimal=False,
- no_if=True, no_for=True, no_while=True, no_try=True,
- no_functiondef=True, no_ifexp=True, no_listcomp=False,
- no_augassign=False, no_assert=True, no_delete=True,
- no_raise=True, no_print=True)
-
+ super(SafeEval, self).__init__(
+ symtable=syms, use_numpy=False, minimal=False,
+ no_if=True, no_for=True, no_while=True, no_try=True,
+ no_functiondef=True, no_ifexp=True, no_listcomp=False,
+ no_augassign=False, no_assert=True, no_delete=True,
+ no_raise=True, no_print=True)
def get_estimator(estimator_json):
-
+ """Return a sklearn or compatible estimator from Galaxy tool inputs
+ """
estimator_module = estimator_json['selected_module']
- if estimator_module == 'customer_estimator':
+ if estimator_module == 'custom_estimator':
c_estimator = estimator_json['c_estimator']
with open(c_estimator, 'rb') as model_handler:
new_model = load_model(model_handler)
return new_model
+ if estimator_module == "binarize_target":
+ wrapped_estimator = estimator_json['wrapped_estimator']
+ with open(wrapped_estimator, 'rb') as model_handler:
+ wrapped_estimator = load_model(model_handler)
+ options = {}
+ if estimator_json['z_score'] is not None:
+ options['z_score'] = estimator_json['z_score']
+ if estimator_json['value'] is not None:
+ options['value'] = estimator_json['value']
+ options['less_is_positive'] = estimator_json['less_is_positive']
+ if estimator_json['clf_or_regr'] == 'BinarizeTargetClassifier':
+ klass = try_get_attr('iraps_classifier',
+ 'BinarizeTargetClassifier')
+ else:
+ klass = try_get_attr('iraps_classifier',
+ 'BinarizeTargetRegressor')
+ return klass(wrapped_estimator, **options)
+
estimator_cls = estimator_json['selected_estimator']
if estimator_module == 'xgboost':
- cls = getattr(xgboost, estimator_cls)
+ klass = getattr(xgboost, estimator_cls)
else:
module = getattr(sklearn, estimator_module)
- cls = getattr(module, estimator_cls)
+ klass = getattr(module, estimator_cls)
- estimator = cls()
+ estimator = klass()
estimator_params = estimator_json['text_params'].strip()
if estimator_params != '':
try:
+ safe_eval = SafeEval()
params = safe_eval('dict(' + estimator_params + ')')
except ValueError:
sys.exit("Unsupported parameter input: `%s`" % estimator_params)
@@ -301,9 +448,13 @@
def get_cv(cv_json):
- """
- cv_json:
- e.g.:
+ """ Return CV splitter from Galaxy tool inputs
+
+ Parameters
+ ----------
+ cv_json : dict
+ From Galaxy tool inputs.
+ e.g.:
{
'selected_cv': 'StratifiedKFold',
'n_splits': 3,
@@ -315,15 +466,25 @@
if cv == 'default':
return cv_json['n_splits'], None
- groups = cv_json.pop('groups', None)
- if groups:
- groups = groups.strip()
- if groups != '':
- if groups.startswith('__ob__'):
- groups = groups[6:]
- if groups.endswith('__cb__'):
- groups = groups[:-6]
- groups = [int(x.strip()) for x in groups.split(',')]
+ groups = cv_json.pop('groups_selector', None)
+ if groups is not None:
+ infile_g = groups['infile_g']
+ header = 'infer' if groups['header_g'] else None
+ column_option = (groups['column_selector_options_g']
+ ['selected_column_selector_option_g'])
+ if column_option in ['by_index_number', 'all_but_by_index_number',
+ 'by_header_name', 'all_but_by_header_name']:
+ c = groups['column_selector_options_g']['col_g']
+ else:
+ c = None
+ groups = read_columns(
+ infile_g,
+ c=c,
+ c_option=column_option,
+ sep='\t',
+ header=header,
+ parse_dates=True)
+ groups = groups.ravel()
for k, v in cv_json.items():
if v == '':
@@ -341,7 +502,12 @@
if test_size and test_size > 1.0:
cv_json['test_size'] = int(test_size)
- cv_class = getattr(model_selection, cv)
+ if cv == 'OrderedKFold':
+ cv_class = try_get_attr('model_validations', 'OrderedKFold')
+ elif cv == 'RepeatedOrderedKFold':
+ cv_class = try_get_attr('model_validations', 'RepeatedOrderedKFold')
+ else:
+ cv_class = getattr(model_selection, cv)
splitter = cv_class(**cv_json)
return splitter, groups
@@ -349,6 +515,9 @@
# needed when sklearn < v0.20
def balanced_accuracy_score(y_true, y_pred):
+ """Compute balanced accuracy score, which is now available in
+ scikit-learn from v0.20.0.
+ """
C = metrics.confusion_matrix(y_true, y_pred)
with np.errstate(divide='ignore', invalid='ignore'):
per_class = np.diag(C) / C.sum(axis=1)
@@ -360,21 +529,71 @@
def get_scoring(scoring_json):
-
+ """Return single sklearn scorer class
+ or multiple scoers in dictionary
+ """
if scoring_json['primary_scoring'] == 'default':
return None
my_scorers = metrics.SCORERS
+ my_scorers['binarize_auc_scorer'] =\
+ try_get_attr('iraps_classifier', 'binarize_auc_scorer')
+ my_scorers['binarize_average_precision_scorer'] =\
+ try_get_attr('iraps_classifier', 'binarize_average_precision_scorer')
if 'balanced_accuracy' not in my_scorers:
- my_scorers['balanced_accuracy'] = metrics.make_scorer(balanced_accuracy_score)
+ my_scorers['balanced_accuracy'] =\
+ metrics.make_scorer(balanced_accuracy_score)
if scoring_json['secondary_scoring'] != 'None'\
- and scoring_json['secondary_scoring'] != scoring_json['primary_scoring']:
- scoring = {}
- scoring['primary'] = my_scorers[scoring_json['primary_scoring']]
+ and scoring_json['secondary_scoring'] !=\
+ scoring_json['primary_scoring']:
+ return_scoring = {}
+ primary_scoring = scoring_json['primary_scoring']
+ return_scoring[primary_scoring] = my_scorers[primary_scoring]
for scorer in scoring_json['secondary_scoring'].split(','):
if scorer != scoring_json['primary_scoring']:
- scoring[scorer] = my_scorers[scorer]
- return scoring
+ return_scoring[scorer] = my_scorers[scorer]
+ return return_scoring
return my_scorers[scoring_json['primary_scoring']]
+
+
+def get_search_params(estimator):
+ """Format the output of `estimator.get_params()`
+ """
+ params = estimator.get_params()
+ results = []
+ for k, v in params.items():
+ # params below won't be shown for search in the searchcv tool
+ keywords = ('n_jobs', 'pre_dispatch', 'memory', 'steps',
+ 'nthread', 'verbose')
+ if k.endswith(keywords):
+ results.append(['*', k, k+": "+repr(v)])
+ else:
+ results.append(['@', k, k+": "+repr(v)])
+ results.append(
+ ["", "Note:",
+ "@, params eligible for search in searchcv tool."])
+
+ return results
+
+
+def try_get_attr(module, name):
+ """try to get attribute from a custom module
+
+ Parameters
+ ----------
+ module : str
+ Module name
+ name : str
+ Attribute (class/function) name.
+
+ Returns
+ -------
+ class or function
+ """
+ mod = sys.modules.get(module, None)
+ if mod:
+ return getattr(mod, name)
+ else:
+ raise Exception("No module named %s." % module)