# HG changeset patch # User bgruening # Date 1565349136 14400 # Node ID 82b6104d4682ff56965532c49dfcce6194a87f1c # Parent 21d3e08b1a4818e818d758286d8c4dafb4dedd5c planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty diff -r 21d3e08b1a48 -r 82b6104d4682 feature_selectors.py --- a/feature_selectors.py Tue Jul 09 19:26:54 2019 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,357 +0,0 @@ -""" -DyRFE -DyRFECV -MyPipeline -MyimbPipeline -check_feature_importances -""" -import numpy as np - -from imblearn import under_sampling, over_sampling, combine -from imblearn.pipeline import Pipeline as imbPipeline -from sklearn import (cluster, compose, decomposition, ensemble, - feature_extraction, feature_selection, - gaussian_process, kernel_approximation, - metrics, model_selection, naive_bayes, - neighbors, pipeline, preprocessing, - svm, linear_model, tree, discriminant_analysis) - -from sklearn.base import BaseEstimator -from sklearn.base import MetaEstimatorMixin, clone, is_classifier -from sklearn.feature_selection.rfe import _rfe_single_fit, RFE, RFECV -from sklearn.model_selection import check_cv -from sklearn.metrics.scorer import check_scoring -from sklearn.utils import check_X_y, safe_indexing, safe_sqr -from sklearn.utils._joblib import Parallel, delayed, effective_n_jobs - - -class DyRFE(RFE): - """ - Mainly used with DyRFECV - - Parameters - ---------- - estimator : object - A supervised learning estimator with a ``fit`` method that provides - information about feature importance either through a ``coef_`` - attribute or through a ``feature_importances_`` attribute. - n_features_to_select : int or None (default=None) - The number of features to select. If `None`, half of the features - are selected. - step : int, float or list, optional (default=1) - If greater than or equal to 1, then ``step`` corresponds to the - (integer) number of features to remove at each iteration. - If within (0.0, 1.0), then ``step`` corresponds to the percentage - (rounded down) of features to remove at each iteration. - If list, a series of steps of features to remove at each iteration. - Iterations stops when steps finish - verbose : int, (default=0) - Controls verbosity of output. - - """ - def __init__(self, estimator, n_features_to_select=None, step=1, - verbose=0): - super(DyRFE, self).__init__(estimator, n_features_to_select, - step, verbose) - - def _fit(self, X, y, step_score=None): - - if type(self.step) is not list: - return super(DyRFE, self)._fit(X, y, step_score) - - # dynamic step - X, y = check_X_y(X, y, "csc") - # Initialization - n_features = X.shape[1] - if self.n_features_to_select is None: - n_features_to_select = n_features // 2 - else: - n_features_to_select = self.n_features_to_select - - step = [] - for s in self.step: - if 0.0 < s < 1.0: - step.append(int(max(1, s * n_features))) - else: - step.append(int(s)) - if s <= 0: - raise ValueError("Step must be >0") - - support_ = np.ones(n_features, dtype=np.bool) - ranking_ = np.ones(n_features, dtype=np.int) - - if step_score: - self.scores_ = [] - - step_i = 0 - # Elimination - while np.sum(support_) > n_features_to_select and step_i < len(step): - - # if last step is 1, will keep loop - if step_i == len(step) - 1 and step[step_i] != 0: - step.append(step[step_i]) - - # Remaining features - features = np.arange(n_features)[support_] - - # Rank the remaining features - estimator = clone(self.estimator) - if self.verbose > 0: - print("Fitting estimator with %d features." % np.sum(support_)) - - estimator.fit(X[:, features], y) - - # Get coefs - if hasattr(estimator, 'coef_'): - coefs = estimator.coef_ - else: - coefs = getattr(estimator, 'feature_importances_', None) - if coefs is None: - raise RuntimeError('The classifier does not expose ' - '"coef_" or "feature_importances_" ' - 'attributes') - - # Get ranks - if coefs.ndim > 1: - ranks = np.argsort(safe_sqr(coefs).sum(axis=0)) - else: - ranks = np.argsort(safe_sqr(coefs)) - - # for sparse case ranks is matrix - ranks = np.ravel(ranks) - - # Eliminate the worse features - threshold =\ - min(step[step_i], np.sum(support_) - n_features_to_select) - - # Compute step score on the previous selection iteration - # because 'estimator' must use features - # that have not been eliminated yet - if step_score: - self.scores_.append(step_score(estimator, features)) - support_[features[ranks][:threshold]] = False - ranking_[np.logical_not(support_)] += 1 - - step_i += 1 - - # Set final attributes - features = np.arange(n_features)[support_] - self.estimator_ = clone(self.estimator) - self.estimator_.fit(X[:, features], y) - - # Compute step score when only n_features_to_select features left - if step_score: - self.scores_.append(step_score(self.estimator_, features)) - self.n_features_ = support_.sum() - self.support_ = support_ - self.ranking_ = ranking_ - - return self - - -class DyRFECV(RFECV, MetaEstimatorMixin): - """ - Compared with RFECV, DyRFECV offers flexiable `step` to eleminate - features, in the format of list, while RFECV supports only fixed number - of `step`. - - Parameters - ---------- - estimator : object - A supervised learning estimator with a ``fit`` method that provides - information about feature importance either through a ``coef_`` - attribute or through a ``feature_importances_`` attribute. - step : int or float, optional (default=1) - If greater than or equal to 1, then ``step`` corresponds to the - (integer) number of features to remove at each iteration. - If within (0.0, 1.0), then ``step`` corresponds to the percentage - (rounded down) of features to remove at each iteration. - If list, a series of step to remove at each iteration. iteration stopes - when finishing all steps - Note that the last iteration may remove fewer than ``step`` features in - order to reach ``min_features_to_select``. - min_features_to_select : int, (default=1) - The minimum number of features to be selected. This number of features - will always be scored, even if the difference between the original - feature count and ``min_features_to_select`` isn't divisible by - ``step``. - cv : int, cross-validation generator or an iterable, optional - Determines the cross-validation splitting strategy. - Possible inputs for cv are: - - None, to use the default 3-fold cross-validation, - - integer, to specify the number of folds. - - :term:`CV splitter`, - - An iterable yielding (train, test) splits as arrays of indices. - For integer/None inputs, if ``y`` is binary or multiclass, - :class:`sklearn.model_selection.StratifiedKFold` is used. If the - estimator is a classifier or if ``y`` is neither binary nor multiclass, - :class:`sklearn.model_selection.KFold` is used. - Refer :ref:`User Guide ` for the various - cross-validation strategies that can be used here. - .. versionchanged:: 0.20 - ``cv`` default value of None will change from 3-fold to 5-fold - in v0.22. - scoring : string, callable or None, optional, (default=None) - A string (see model evaluation documentation) or - a scorer callable object / function with signature - ``scorer(estimator, X, y)``. - verbose : int, (default=0) - Controls verbosity of output. - n_jobs : int or None, optional (default=None) - Number of cores to run in parallel while fitting across folds. - ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. - ``-1`` means using all processors. See :term:`Glossary ` - for more details. - """ - def __init__(self, estimator, step=1, min_features_to_select=1, cv='warn', - scoring=None, verbose=0, n_jobs=None): - super(DyRFECV, self).__init__( - estimator, step=step, - min_features_to_select=min_features_to_select, - cv=cv, scoring=scoring, verbose=verbose, - n_jobs=n_jobs) - - def fit(self, X, y, groups=None): - """Fit the RFE model and automatically tune the number of selected - features. - Parameters - ---------- - X : {array-like, sparse matrix}, shape = [n_samples, n_features] - Training vector, where `n_samples` is the number of samples and - `n_features` is the total number of features. - y : array-like, shape = [n_samples] - Target values (integers for classification, real numbers for - regression). - groups : array-like, shape = [n_samples], optional - Group labels for the samples used while splitting the dataset into - train/test set. - """ - if type(self.step) is not list: - return super(DyRFECV, self).fit(X, y, groups) - - X, y = check_X_y(X, y, "csr") - - # Initialization - cv = check_cv(self.cv, y, is_classifier(self.estimator)) - scorer = check_scoring(self.estimator, scoring=self.scoring) - n_features = X.shape[1] - - step = [] - for s in self.step: - if 0.0 < s < 1.0: - step.append(int(max(1, s * n_features))) - else: - step.append(int(s)) - if s <= 0: - raise ValueError("Step must be >0") - - # Build an RFE object, which will evaluate and score each possible - # feature count, down to self.min_features_to_select - rfe = DyRFE(estimator=self.estimator, - n_features_to_select=self.min_features_to_select, - step=self.step, verbose=self.verbose) - - # Determine the number of subsets of features by fitting across - # the train folds and choosing the "features_to_select" parameter - # that gives the least averaged error across all folds. - - # Note that joblib raises a non-picklable error for bound methods - # even if n_jobs is set to 1 with the default multiprocessing - # backend. - # This branching is done so that to - # make sure that user code that sets n_jobs to 1 - # and provides bound methods as scorers is not broken with the - # addition of n_jobs parameter in version 0.18. - - if effective_n_jobs(self.n_jobs) == 1: - parallel, func = list, _rfe_single_fit - else: - parallel = Parallel(n_jobs=self.n_jobs) - func = delayed(_rfe_single_fit) - - scores = parallel( - func(rfe, self.estimator, X, y, train, test, scorer) - for train, test in cv.split(X, y, groups)) - - scores = np.sum(scores, axis=0) - diff = int(scores.shape[0]) - len(step) - if diff > 0: - step = np.r_[step, [step[-1]] * diff] - scores_rev = scores[::-1] - argmax_idx = len(scores) - np.argmax(scores_rev) - 1 - n_features_to_select = max( - n_features - sum(step[:argmax_idx]), - self.min_features_to_select) - - # Re-execute an elimination with best_k over the whole set - rfe = DyRFE(estimator=self.estimator, - n_features_to_select=n_features_to_select, step=self.step, - verbose=self.verbose) - - rfe.fit(X, y) - - # Set final attributes - self.support_ = rfe.support_ - self.n_features_ = rfe.n_features_ - self.ranking_ = rfe.ranking_ - self.estimator_ = clone(self.estimator) - self.estimator_.fit(self.transform(X), y) - - # Fixing a normalization error, n is equal to get_n_splits(X, y) - 1 - # here, the scores are normalized by get_n_splits(X, y) - self.grid_scores_ = scores[::-1] / cv.get_n_splits(X, y, groups) - return self - - -class MyPipeline(pipeline.Pipeline): - """ - Extend pipeline object to have feature_importances_ attribute - """ - def fit(self, X, y=None, **fit_params): - super(MyPipeline, self).fit(X, y, **fit_params) - estimator = self.steps[-1][-1] - if hasattr(estimator, 'coef_'): - coefs = estimator.coef_ - else: - coefs = getattr(estimator, 'feature_importances_', None) - if coefs is None: - raise RuntimeError('The estimator in the pipeline does not expose ' - '"coef_" or "feature_importances_" ' - 'attributes') - self.feature_importances_ = coefs - return self - - -class MyimbPipeline(imbPipeline): - """ - Extend imblance pipeline object to have feature_importances_ attribute - """ - def fit(self, X, y=None, **fit_params): - super(MyimbPipeline, self).fit(X, y, **fit_params) - estimator = self.steps[-1][-1] - if hasattr(estimator, 'coef_'): - coefs = estimator.coef_ - else: - coefs = getattr(estimator, 'feature_importances_', None) - if coefs is None: - raise RuntimeError('The estimator in the pipeline does not expose ' - '"coef_" or "feature_importances_" ' - 'attributes') - self.feature_importances_ = coefs - return self - - -def check_feature_importances(estimator): - """ - For pipeline object which has no feature_importances_ property, - this function returns the same comfigured pipeline object with - attached the last estimator's feature_importances_. - """ - if estimator.__class__.__module__ == 'sklearn.pipeline': - pipeline_steps = estimator.get_params()['steps'] - estimator = MyPipeline(pipeline_steps) - elif estimator.__class__.__module__ == 'imblearn.pipeline': - pipeline_steps = estimator.get_params()['steps'] - estimator = MyimbPipeline(pipeline_steps) - else: - return estimator diff -r 21d3e08b1a48 -r 82b6104d4682 iraps_classifier.py --- a/iraps_classifier.py Tue Jul 09 19:26:54 2019 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,569 +0,0 @@ -""" -class IRAPSCore -class IRAPSClassifier -class BinarizeTargetClassifier -class BinarizeTargetRegressor -class _BinarizeTargetScorer -class _BinarizeTargetProbaScorer - -binarize_auc_scorer -binarize_average_precision_scorer - -binarize_accuracy_scorer -binarize_balanced_accuracy_scorer -binarize_precision_scorer -binarize_recall_scorer -""" - - -import numpy as np -import random -import warnings - -from abc import ABCMeta -from scipy.stats import ttest_ind -from sklearn import metrics -from sklearn.base import BaseEstimator, clone, RegressorMixin -from sklearn.externals import six -from sklearn.feature_selection.univariate_selection import _BaseFilter -from sklearn.metrics.scorer import _BaseScorer -from sklearn.pipeline import Pipeline -from sklearn.utils import as_float_array, check_X_y -from sklearn.utils._joblib import Parallel, delayed -from sklearn.utils.validation import (check_array, check_is_fitted, - check_memory, column_or_1d) - - -VERSION = '0.1.1' - - -class IRAPSCore(six.with_metaclass(ABCMeta, BaseEstimator)): - """ - Base class of IRAPSClassifier - From sklearn BaseEstimator: - get_params() - set_params() - - Parameters - ---------- - n_iter : int - sample count - - positive_thres : float - z_score shreshold to discretize positive target values - - negative_thres : float - z_score threshold to discretize negative target values - - verbose : int - 0 or geater, if not 0, print progress - - n_jobs : int, default=1 - The number of CPUs to use to do the computation. - - pre_dispatch : int, or string. - Controls the number of jobs that get dispatched during parallel - execution. Reducing this number can be useful to avoid an - explosion of memory consumption when more jobs get dispatched - than CPUs can process. This parameter can be: - - None, in which case all the jobs are immediately - created and spawned. Use this for lightweight and - fast-running jobs, to avoid delays due to on-demand - spawning of the jobs - - An int, giving the exact number of total jobs that are - spawned - - A string, giving an expression as a function of n_jobs, - as in '2*n_jobs' - - random_state : int or None - """ - - def __init__(self, n_iter=1000, positive_thres=-1, negative_thres=0, - verbose=0, n_jobs=1, pre_dispatch='2*n_jobs', - random_state=None): - """ - IRAPS turns towwards general Anomaly Detection - It comapares positive_thres with negative_thres, - and decide which portion is the positive target. - e.g.: - (positive_thres=-1, negative_thres=0) - => positive = Z_score of target < -1 - (positive_thres=1, negative_thres=0) - => positive = Z_score of target > 1 - - Note: The positive targets here is always the - abnormal minority group. - """ - self.n_iter = n_iter - self.positive_thres = positive_thres - self.negative_thres = negative_thres - self.verbose = verbose - self.n_jobs = n_jobs - self.pre_dispatch = pre_dispatch - self.random_state = random_state - - def fit(self, X, y): - """ - X: array-like (n_samples x n_features) - y: 1-d array-like (n_samples) - """ - X, y = check_X_y(X, y, ['csr', 'csc'], multi_output=False) - - def _stochastic_sampling(X, y, random_state=None, positive_thres=-1, - negative_thres=0): - # each iteration select a random number of random subset of - # training samples. this is somewhat different from the original - # IRAPS method, but effect is almost the same. - SAMPLE_SIZE = [0.25, 0.75] - n_samples = X.shape[0] - - if random_state is None: - n_select = random.randint(int(n_samples * SAMPLE_SIZE[0]), - int(n_samples * SAMPLE_SIZE[1])) - index = random.sample(list(range(n_samples)), n_select) - else: - n_select = random.Random(random_state).randint( - int(n_samples * SAMPLE_SIZE[0]), - int(n_samples * SAMPLE_SIZE[1])) - index = random.Random(random_state).sample( - list(range(n_samples)), n_select) - - X_selected, y_selected = X[index], y[index] - - # Spliting by z_scores. - y_selected = (y_selected - y_selected.mean()) / y_selected.std() - if positive_thres < negative_thres: - X_selected_positive = X_selected[y_selected < positive_thres] - X_selected_negative = X_selected[y_selected > negative_thres] - else: - X_selected_positive = X_selected[y_selected > positive_thres] - X_selected_negative = X_selected[y_selected < negative_thres] - - # For every iteration, at least 5 responders are selected - if X_selected_positive.shape[0] < 5: - warnings.warn("Warning: fewer than 5 positives were selected!") - return - - # p_values - _, p = ttest_ind(X_selected_positive, X_selected_negative, - axis=0, equal_var=False) - - # fold_change == mean change? - # TODO implement other normalization method - positive_mean = X_selected_positive.mean(axis=0) - negative_mean = X_selected_negative.mean(axis=0) - mean_change = positive_mean - negative_mean - # mean_change = np.select( - # [positive_mean > negative_mean, - # positive_mean < negative_mean], - # [positive_mean / negative_mean, - # -negative_mean / positive_mean]) - # mean_change could be adjusted by power of 2 - # mean_change = 2**mean_change \ - # if mean_change>0 else -2**abs(mean_change) - - return p, mean_change, negative_mean - - parallel = Parallel(n_jobs=self.n_jobs, verbose=self.verbose, - pre_dispatch=self.pre_dispatch) - if self.random_state is None: - res = parallel(delayed(_stochastic_sampling)( - X, y, random_state=None, - positive_thres=self.positive_thres, - negative_thres=self.negative_thres) - for i in range(self.n_iter)) - else: - res = parallel(delayed(_stochastic_sampling)( - X, y, random_state=seed, - positive_thres=self.positive_thres, - negative_thres=self.negative_thres) - for seed in range(self.random_state, - self.random_state+self.n_iter)) - res = [_ for _ in res if _] - if len(res) < 50: - raise ValueError("too few (%d) valid feature lists " - "were generated!" % len(res)) - pvalues = np.vstack([x[0] for x in res]) - fold_changes = np.vstack([x[1] for x in res]) - base_values = np.vstack([x[2] for x in res]) - - self.pvalues_ = np.asarray(pvalues) - self.fold_changes_ = np.asarray(fold_changes) - self.base_values_ = np.asarray(base_values) - - return self - - -def _iraps_core_fit(iraps_core, X, y): - return iraps_core.fit(X, y) - - -class IRAPSClassifier(six.with_metaclass(ABCMeta, _BaseFilter, - BaseEstimator, RegressorMixin)): - """ - Extend the bases of both sklearn feature_selector and classifier. - From sklearn BaseEstimator: - get_params() - set_params() - From sklearn _BaseFilter: - get_support() - fit_transform(X) - transform(X) - From sklearn RegressorMixin: - score(X, y): R2 - New: - predict(X) - predict_label(X) - get_signature() - Properties: - discretize_value - - Parameters - ---------- - iraps_core: object - p_thres: float, threshold for p_values - fc_thres: float, threshold for fold change or mean difference - occurrence: float, occurrence rate selected by set of p_thres and fc_thres - discretize: float, threshold of z_score to discretize target value - memory: None, str or joblib.Memory object - min_signature_features: int, the mininum number of features in a signature - """ - - def __init__(self, iraps_core, p_thres=1e-4, fc_thres=0.1, - occurrence=0.8, discretize=-1, memory=None, - min_signature_features=1): - self.iraps_core = iraps_core - self.p_thres = p_thres - self.fc_thres = fc_thres - self.occurrence = occurrence - self.discretize = discretize - self.memory = memory - self.min_signature_features = min_signature_features - - def fit(self, X, y): - memory = check_memory(self.memory) - cached_fit = memory.cache(_iraps_core_fit) - iraps_core = clone(self.iraps_core) - # allow pre-fitted iraps_core here - if not hasattr(iraps_core, 'pvalues_'): - iraps_core = cached_fit(iraps_core, X, y) - self.iraps_core_ = iraps_core - - pvalues = as_float_array(iraps_core.pvalues_, copy=True) - # why np.nan is here? - pvalues[np.isnan(pvalues)] = np.finfo(pvalues.dtype).max - - fold_changes = as_float_array(iraps_core.fold_changes_, copy=True) - fold_changes[np.isnan(fold_changes)] = 0.0 - - base_values = as_float_array(iraps_core.base_values_, copy=True) - - p_thres = self.p_thres - fc_thres = self.fc_thres - occurrence = self.occurrence - - mask_0 = np.zeros(pvalues.shape, dtype=np.int32) - # mark p_values less than the threashold - mask_0[pvalues <= p_thres] = 1 - # mark fold_changes only when greater than the threashold - mask_0[abs(fold_changes) < fc_thres] = 0 - - # count the occurrence and mask greater than the threshold - counts = mask_0.sum(axis=0) - occurrence_thres = int(occurrence * iraps_core.n_iter) - mask = np.zeros(counts.shape, dtype=bool) - mask[counts >= occurrence_thres] = 1 - - # generate signature - fold_changes[mask_0 == 0] = 0.0 - signature = fold_changes[:, mask].sum(axis=0) / counts[mask] - signature = np.vstack((signature, base_values[:, mask].mean(axis=0))) - # It's not clearn whether min_size could impact prediction - # performance - if signature is None\ - or signature.shape[1] < self.min_signature_features: - raise ValueError("The classifier got None signature or the number " - "of sinature feature is less than minimum!") - - self.signature_ = np.asarray(signature) - self.mask_ = mask - # TODO: support other discretize method: fixed value, upper - # third quater, etc. - self.discretize_value = y.mean() + y.std() * self.discretize - if iraps_core.negative_thres > iraps_core.positive_thres: - self.less_is_positive = True - else: - self.less_is_positive = False - - return self - - def _get_support_mask(self): - """ - return mask of feature selection indices - """ - check_is_fitted(self, 'mask_') - - return self.mask_ - - def get_signature(self): - """ - return signature - """ - check_is_fitted(self, 'signature_') - - return self.signature_ - - def predict(self, X): - """ - compute the correlation coefficient with irpas signature - """ - signature = self.get_signature() - - X = as_float_array(X) - X_transformed = self.transform(X) - signature[1] - corrcoef = np.array( - [np.corrcoef(signature[0], e)[0][1] for e in X_transformed]) - corrcoef[np.isnan(corrcoef)] = np.finfo(np.float32).min - - return corrcoef - - def predict_label(self, X, clf_cutoff=0.4): - return self.predict(X) >= clf_cutoff - - -class BinarizeTargetClassifier(BaseEstimator, RegressorMixin): - """ - Convert continuous target to binary labels (True and False) - and apply a classification estimator. - - Parameters - ---------- - classifier: object - Estimator object such as derived from sklearn `ClassifierMixin`. - - z_score: float, default=-1.0 - Threshold value based on z_score. Will be ignored when - fixed_value is set - - value: float, default=None - Threshold value - - less_is_positive: boolean, default=True - When target is less the threshold value, it will be converted - to True, False otherwise. - - Attributes - ---------- - classifier_: object - Fitted classifier - - discretize_value: float - The threshold value used to discretize True and False targets - """ - - def __init__(self, classifier, z_score=-1, value=None, - less_is_positive=True): - self.classifier = classifier - self.z_score = z_score - self.value = value - self.less_is_positive = less_is_positive - - def fit(self, X, y, sample_weight=None): - """ - Convert y to True and False labels and then fit the classifier - with X and new y - - Returns - ------ - self: object - """ - y = check_array(y, accept_sparse=False, force_all_finite=True, - ensure_2d=False, dtype='numeric') - y = column_or_1d(y) - - if self.value is None: - discretize_value = y.mean() + y.std() * self.z_score - else: - discretize_value = self.Value - self.discretize_value = discretize_value - - if self.less_is_positive: - y_trans = y < discretize_value - else: - y_trans = y > discretize_value - - self.classifier_ = clone(self.classifier) - - if sample_weight is not None: - self.classifier_.fit(X, y_trans, sample_weight=sample_weight) - else: - self.classifier_.fit(X, y_trans) - - if hasattr(self.classifier_, 'feature_importances_'): - self.feature_importances_ = self.classifier_.feature_importances_ - if hasattr(self.classifier_, 'coef_'): - self.coef_ = self.classifier_.coef_ - if hasattr(self.classifier_, 'n_outputs_'): - self.n_outputs_ = self.classifier_.n_outputs_ - if hasattr(self.classifier_, 'n_features_'): - self.n_features_ = self.classifier_.n_features_ - - return self - - def predict(self, X): - """ - Predict class probabilities of X. - """ - check_is_fitted(self, 'classifier_') - proba = self.classifier_.predict_proba(X) - return proba[:, 1] - - def predict_label(self, X): - """Predict class label of X - """ - check_is_fitted(self, 'classifier_') - return self.classifier_.predict(X) - - -class _BinarizeTargetProbaScorer(_BaseScorer): - """ - base class to make binarized target specific scorer - """ - - def __call__(self, clf, X, y, sample_weight=None): - clf_name = clf.__class__.__name__ - # support pipeline object - if isinstance(clf, Pipeline): - main_estimator = clf.steps[-1][-1] - # support stacking ensemble estimators - # TODO support nested pipeline/stacking estimators - elif clf_name in ['StackingCVClassifier', 'StackingClassifier']: - main_estimator = clf.meta_clf_ - elif clf_name in ['StackingCVRegressor', 'StackingRegressor']: - main_estimator = clf.meta_regr_ - else: - main_estimator = clf - - discretize_value = main_estimator.discretize_value - less_is_positive = main_estimator.less_is_positive - - if less_is_positive: - y_trans = y < discretize_value - else: - y_trans = y > discretize_value - - y_pred = clf.predict(X) - if sample_weight is not None: - return self._sign * self._score_func(y_trans, y_pred, - sample_weight=sample_weight, - **self._kwargs) - else: - return self._sign * self._score_func(y_trans, y_pred, - **self._kwargs) - - -# roc_auc -binarize_auc_scorer =\ - _BinarizeTargetProbaScorer(metrics.roc_auc_score, 1, {}) - -# average_precision_scorer -binarize_average_precision_scorer =\ - _BinarizeTargetProbaScorer(metrics.average_precision_score, 1, {}) - -# roc_auc_scorer -iraps_auc_scorer = binarize_auc_scorer - -# average_precision_scorer -iraps_average_precision_scorer = binarize_average_precision_scorer - - -class BinarizeTargetRegressor(BaseEstimator, RegressorMixin): - """ - Extend regression estimator to have discretize_value - - Parameters - ---------- - regressor: object - Estimator object such as derived from sklearn `RegressionMixin`. - - z_score: float, default=-1.0 - Threshold value based on z_score. Will be ignored when - fixed_value is set - - value: float, default=None - Threshold value - - less_is_positive: boolean, default=True - When target is less the threshold value, it will be converted - to True, False otherwise. - - Attributes - ---------- - regressor_: object - Fitted regressor - - discretize_value: float - The threshold value used to discretize True and False targets - """ - - def __init__(self, regressor, z_score=-1, value=None, - less_is_positive=True): - self.regressor = regressor - self.z_score = z_score - self.value = value - self.less_is_positive = less_is_positive - - def fit(self, X, y, sample_weight=None): - """ - Calculate the discretize_value fit the regressor with traning data - - Returns - ------ - self: object - """ - y = check_array(y, accept_sparse=False, force_all_finite=True, - ensure_2d=False, dtype='numeric') - y = column_or_1d(y) - - if self.value is None: - discretize_value = y.mean() + y.std() * self.z_score - else: - discretize_value = self.Value - self.discretize_value = discretize_value - - self.regressor_ = clone(self.regressor) - - if sample_weight is not None: - self.regressor_.fit(X, y, sample_weight=sample_weight) - else: - self.regressor_.fit(X, y) - - # attach classifier attributes - if hasattr(self.regressor_, 'feature_importances_'): - self.feature_importances_ = self.regressor_.feature_importances_ - if hasattr(self.regressor_, 'coef_'): - self.coef_ = self.regressor_.coef_ - if hasattr(self.regressor_, 'n_outputs_'): - self.n_outputs_ = self.regressor_.n_outputs_ - if hasattr(self.regressor_, 'n_features_'): - self.n_features_ = self.regressor_.n_features_ - - return self - - def predict(self, X): - """Predict target value of X - """ - check_is_fitted(self, 'regressor_') - y_pred = self.regressor_.predict(X) - if not np.all((y_pred >= 0) & (y_pred <= 1)): - y_pred = (y_pred - y_pred.min()) / (y_pred.max() - y_pred.min()) - if self.less_is_positive: - y_pred = 1 - y_pred - return y_pred - - -# roc_auc_scorer -regression_auc_scorer = binarize_auc_scorer - -# average_precision_scorer -regression_average_precision_scorer = binarize_average_precision_scorer diff -r 21d3e08b1a48 -r 82b6104d4682 keras_deep_learning.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/keras_deep_learning.py Fri Aug 09 07:12:16 2019 -0400 @@ -0,0 +1,359 @@ +import argparse +import json +import keras +import pandas as pd +import pickle +import six +import warnings + +from ast import literal_eval +from keras.models import Sequential, Model +from galaxy_ml.utils import try_get_attr, get_search_params + + +def _handle_shape(literal): + """Eval integer or list/tuple of integers from string + + Parameters: + ----------- + literal : str. + """ + literal = literal.strip() + if not literal: + return None + try: + return literal_eval(literal) + except NameError as e: + print(e) + return literal + + +def _handle_regularizer(literal): + """Construct regularizer from string literal + + Parameters + ---------- + literal : str. E.g. '(0.1, 0)' + """ + literal = literal.strip() + if not literal: + return None + + l1, l2 = literal_eval(literal) + + if not l1 and not l2: + return None + + if l1 is None: + l1 = 0. + if l2 is None: + l2 = 0. + + return keras.regularizers.l1_l2(l1=l1, l2=l2) + + +def _handle_constraint(config): + """Construct constraint from galaxy tool parameters. + Suppose correct dictionary format + + Parameters + ---------- + config : dict. E.g. + "bias_constraint": + {"constraint_options": + {"max_value":1.0, + "min_value":0.0, + "axis":"[0, 1, 2]" + }, + "constraint_type": + "MinMaxNorm" + } + """ + constraint_type = config['constraint_type'] + if constraint_type == 'None': + return None + + klass = getattr(keras.constraints, constraint_type) + options = config.get('constraint_options', {}) + if 'axis' in options: + options['axis'] = literal_eval(options['axis']) + + return klass(**options) + + +def _handle_lambda(literal): + return None + + +def _handle_layer_parameters(params): + """Access to handle all kinds of parameters + """ + for key, value in six.iteritems(params): + if value == 'None': + params[key] = None + continue + + if type(value) in [int, float, bool]\ + or (type(value) is str and value.isalpha()): + continue + + if key in ['input_shape', 'noise_shape', 'shape', 'batch_shape', + 'target_shape', 'dims', 'kernel_size', 'strides', + 'dilation_rate', 'output_padding', 'cropping', 'size', + 'padding', 'pool_size', 'axis', 'shared_axes']: + params[key] = _handle_shape(value) + + elif key.endswith('_regularizer'): + params[key] = _handle_regularizer(value) + + elif key.endswith('_constraint'): + params[key] = _handle_constraint(value) + + elif key == 'function': # No support for lambda/function eval + params.pop(key) + + return params + + +def get_sequential_model(config): + """Construct keras Sequential model from Galaxy tool parameters + + Parameters: + ----------- + config : dictionary, galaxy tool parameters loaded by JSON + """ + model = Sequential() + input_shape = _handle_shape(config['input_shape']) + layers = config['layers'] + for layer in layers: + options = layer['layer_selection'] + layer_type = options.pop('layer_type') + klass = getattr(keras.layers, layer_type) + other_options = options.pop('layer_options', {}) + options.update(other_options) + + # parameters needs special care + options = _handle_layer_parameters(options) + + # add input_shape to the first layer only + if not getattr(model, '_layers') and input_shape is not None: + options['input_shape'] = input_shape + + model.add(klass(**options)) + + return model + + +def get_functional_model(config): + """Construct keras functional model from Galaxy tool parameters + + Parameters + ----------- + config : dictionary, galaxy tool parameters loaded by JSON + """ + layers = config['layers'] + all_layers = [] + for layer in layers: + options = layer['layer_selection'] + layer_type = options.pop('layer_type') + klass = getattr(keras.layers, layer_type) + inbound_nodes = options.pop('inbound_nodes', None) + other_options = options.pop('layer_options', {}) + options.update(other_options) + + # parameters needs special care + options = _handle_layer_parameters(options) + # merge layers + if 'merging_layers' in options: + idxs = literal_eval(options.pop('merging_layers')) + merging_layers = [all_layers[i-1] for i in idxs] + new_layer = klass(**options)(merging_layers) + # non-input layers + elif inbound_nodes is not None: + new_layer = klass(**options)(all_layers[inbound_nodes-1]) + # input layers + else: + new_layer = klass(**options) + + all_layers.append(new_layer) + + input_indexes = _handle_shape(config['input_layers']) + input_layers = [all_layers[i-1] for i in input_indexes] + + output_indexes = _handle_shape(config['output_layers']) + output_layers = [all_layers[i-1] for i in output_indexes] + + return Model(inputs=input_layers, outputs=output_layers) + + +def get_batch_generator(config): + """Construct keras online data generator from Galaxy tool parameters + + Parameters + ----------- + config : dictionary, galaxy tool parameters loaded by JSON + """ + generator_type = config.pop('generator_type') + klass = try_get_attr('galaxy_ml.preprocessors', generator_type) + + if generator_type == 'GenomicIntervalBatchGenerator': + config['ref_genome_path'] = 'to_be_determined' + config['intervals_path'] = 'to_be_determined' + config['target_path'] = 'to_be_determined' + config['features'] = 'to_be_determined' + else: + config['fasta_path'] = 'to_be_determined' + + return klass(**config) + + +def config_keras_model(inputs, outfile): + """ config keras model layers and output JSON + + Parameters + ---------- + inputs : dict + loaded galaxy tool parameters from `keras_model_config` + tool. + outfile : str + Path to galaxy dataset containing keras model JSON. + """ + model_type = inputs['model_selection']['model_type'] + layers_config = inputs['model_selection'] + + if model_type == 'sequential': + model = get_sequential_model(layers_config) + else: + model = get_functional_model(layers_config) + + json_string = model.to_json() + + with open(outfile, 'w') as f: + f.write(json_string) + + +def build_keras_model(inputs, outfile, model_json, infile_weights=None, + batch_mode=False, outfile_params=None): + """ for `keras_model_builder` tool + + Parameters + ---------- + inputs : dict + loaded galaxy tool parameters from `keras_model_builder` tool. + outfile : str + Path to galaxy dataset containing the keras_galaxy model output. + model_json : str + Path to dataset containing keras model JSON. + infile_weights : str or None + If string, path to dataset containing model weights. + batch_mode : bool, default=False + Whether to build online batch classifier. + outfile_params : str, default=None + File path to search parameters output. + """ + with open(model_json, 'r') as f: + json_model = json.load(f) + + config = json_model['config'] + + options = {} + + if json_model['class_name'] == 'Sequential': + options['model_type'] = 'sequential' + klass = Sequential + elif json_model['class_name'] == 'Model': + options['model_type'] = 'functional' + klass = Model + else: + raise ValueError("Unknow Keras model class: %s" + % json_model['class_name']) + + # load prefitted model + if inputs['mode_selection']['mode_type'] == 'prefitted': + estimator = klass.from_config(config) + estimator.load_weights(infile_weights) + # build train model + else: + cls_name = inputs['mode_selection']['learning_type'] + klass = try_get_attr('galaxy_ml.keras_galaxy_models', cls_name) + + options['loss'] = (inputs['mode_selection'] + ['compile_params']['loss']) + options['optimizer'] =\ + (inputs['mode_selection']['compile_params'] + ['optimizer_selection']['optimizer_type']).lower() + + options.update((inputs['mode_selection']['compile_params'] + ['optimizer_selection']['optimizer_options'])) + + train_metrics = (inputs['mode_selection']['compile_params'] + ['metrics']).split(',') + if train_metrics[-1] == 'none': + train_metrics = train_metrics[:-1] + options['metrics'] = train_metrics + + options.update(inputs['mode_selection']['fit_params']) + options['seed'] = inputs['mode_selection']['random_seed'] + + if batch_mode: + generator = get_batch_generator(inputs['mode_selection'] + ['generator_selection']) + options['data_batch_generator'] = generator + options['prediction_steps'] = \ + inputs['mode_selection']['prediction_steps'] + options['class_positive_factor'] = \ + inputs['mode_selection']['class_positive_factor'] + estimator = klass(config, **options) + if outfile_params: + hyper_params = get_search_params(estimator) + # TODO: remove this after making `verbose` tunable + for h_param in hyper_params: + if h_param[1].endswith('verbose'): + h_param[0] = '@' + df = pd.DataFrame(hyper_params, columns=['', 'Parameter', 'Value']) + df.to_csv(outfile_params, sep='\t', index=False) + + print(repr(estimator)) + # save model by pickle + with open(outfile, 'wb') as f: + pickle.dump(estimator, f, pickle.HIGHEST_PROTOCOL) + + +if __name__ == '__main__': + warnings.simplefilter('ignore') + + aparser = argparse.ArgumentParser() + aparser.add_argument("-i", "--inputs", dest="inputs", required=True) + aparser.add_argument("-m", "--model_json", dest="model_json") + aparser.add_argument("-t", "--tool_id", dest="tool_id") + aparser.add_argument("-w", "--infile_weights", dest="infile_weights") + aparser.add_argument("-o", "--outfile", dest="outfile") + aparser.add_argument("-p", "--outfile_params", dest="outfile_params") + args = aparser.parse_args() + + input_json_path = args.inputs + with open(input_json_path, 'r') as param_handler: + inputs = json.load(param_handler) + + tool_id = args.tool_id + outfile = args.outfile + outfile_params = args.outfile_params + model_json = args.model_json + infile_weights = args.infile_weights + + # for keras_model_config tool + if tool_id == 'keras_model_config': + config_keras_model(inputs, outfile) + + # for keras_model_builder tool + else: + batch_mode = False + if tool_id == 'keras_batch_models': + batch_mode = True + + build_keras_model(inputs=inputs, + model_json=model_json, + infile_weights=infile_weights, + batch_mode=batch_mode, + outfile=outfile, + outfile_params=outfile_params) diff -r 21d3e08b1a48 -r 82b6104d4682 keras_macros.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/keras_macros.xml Fri Aug 09 07:12:16 2019 -0400 @@ -0,0 +1,1228 @@ + + 0.4.0 + + + + + + + + +
+ + + + +
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + + + + + + +
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + + + + + + +
+ +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + + + + + + + + + + \ + + + + + + +
+ +
+ + + + + +
+ + + + + + + + + + + + + \ + + + + + + +
+ +
+ + + + + +
+ + + + + + + + + + + + + + + + + + + + + + + +
+ +
+ + + + + +
+ + + + + + + + + + + + + + + + + + + + + + + +
+ +
+ + + + + +
+ + + + + + + + + + + + + + + + + + + + + +
+ +
+ + + + + +
+ + + + + + + + + + + + + + + + + + + + +
+ +
+ + + + + +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + + + +
+ + + + + + + + + + + + + + + + + + + + +
+ +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + + + + + + + \ + + + + + + +
+ +
+ + + + + +
+ + + + + + + + + + + + \ + + + + + + +
+ +
+ + + + + +
+ +
+ +
+ + + + + + +
+ + + + + + +
+ +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+ + +
+ + + + + + + +
+
+ + + + + @misc{chollet2015keras, + title={Keras}, + url={https://keras.io}, + author={Chollet, Fran\c{c}ois and others}, + year={2015}, + howpublished={https://keras.io}, + } + + + + + + @misc{tensorflow2015-whitepaper, + title={ {TensorFlow}: Large-Scale Machine Learning on Heterogeneous Systems}, + url={https://www.tensorflow.org/}, + note={Software available from tensorflow.org}, + author={ + Mart\'{\i}n~Abadi and + Ashish~Agarwal and + Paul~Barham and + Eugene~Brevdo and + Zhifeng~Chen and + Craig~Citro and + Greg~S.~Corrado and + Andy~Davis and + Jeffrey~Dean and + Matthieu~Devin and + Sanjay~Ghemawat and + Ian~Goodfellow and + Andrew~Harp and + Geoffrey~Irving and + Michael~Isard and + Yangqing Jia and + Rafal~Jozefowicz and + Lukasz~Kaiser and + Manjunath~Kudlur and + Josh~Levenberg and + Dandelion~Man\'{e} and + Rajat~Monga and + Sherry~Moore and + Derek~Murray and + Chris~Olah and + Mike~Schuster and + Jonathon~Shlens and + Benoit~Steiner and + Ilya~Sutskever and + Kunal~Talwar and + Paul~Tucker and + Vincent~Vanhoucke and + Vijay~Vasudevan and + Fernanda~Vi\'{e}gas and + Oriol~Vinyals and + Pete~Warden and + Martin~Wattenberg and + Martin~Wicke and + Yuan~Yu and + Xiaoqiang~Zheng}, + year={2015}, + } + + + +
\ No newline at end of file diff -r 21d3e08b1a48 -r 82b6104d4682 main_macros.xml --- a/main_macros.xml Tue Jul 09 19:26:54 2019 -0400 +++ b/main_macros.xml Fri Aug 09 07:12:16 2019 -0400 @@ -1,16 +1,12 @@ - 1.0.0.4 + 1.0.7.10 + + 0.2.0 python - scikit-learn - pandas - xgboost - asteval - skrebate - imbalanced-learn - mlxtend + Galaxy-ML @@ -420,8 +416,7 @@ - - + @@ -429,6 +424,8 @@ + + @@ -436,6 +433,36 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + @@ -705,7 +732,6 @@ - @@ -731,25 +757,6 @@ help="Feature values below or equal to this are replaced by 0, above it by 1. Threshold may not be less than 0 for operations on sparse matrices. "/> - -
- - - - - - - - - -
-
- +
@@ -922,9 +929,9 @@ - + - + @@ -932,7 +939,7 @@ - + @@ -953,6 +960,40 @@
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + @@ -1167,7 +1208,7 @@ - + @@ -1286,14 +1327,13 @@
- + - + - @@ -1310,6 +1350,30 @@
+ + +
+ + + + + + + + + + + + + + + + + + +
+
+ @@ -1750,6 +1814,40 @@ + +
+ + + + + + + + + +
+
+ + + + + + + + + + + + + + + + + + + + + @@ -1847,7 +1945,7 @@ - + @article{JMLR:v18:16-365, author = {Guillaume Lema{{\^i}}tre and Fernando Nogueira and Christos K. Aridas}, @@ -1862,4 +1960,19 @@ + + + @article{chen2019selene, + title={Selene: a PyTorch-based deep learning library for sequence data}, + author={Chen, Kathleen M and Cofer, Evan M and Zhou, Jian and Troyanskaya, Olga G}, + journal={Nature methods}, + volume={16}, + number={4}, + pages={315}, + year={2019}, + publisher={Nature Publishing Group} + } + + +
diff -r 21d3e08b1a48 -r 82b6104d4682 model_prediction.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/model_prediction.py Fri Aug 09 07:12:16 2019 -0400 @@ -0,0 +1,205 @@ +import argparse +import json +import numpy as np +import pandas as pd +import warnings + +from scipy.io import mmread +from sklearn.pipeline import Pipeline + +from galaxy_ml.utils import (load_model, read_columns, + get_module, try_get_attr) + + +N_JOBS = int(__import__('os').environ.get('GALAXY_SLOTS', 1)) + + +def main(inputs, infile_estimator, outfile_predict, + infile_weights=None, infile1=None, + fasta_path=None, ref_seq=None, + vcf_path=None): + """ + Parameter + --------- + inputs : str + File path to galaxy tool parameter + + infile_estimator : strgit + File path to trained estimator input + + outfile_predict : str + File path to save the prediction results, tabular + + infile_weights : str + File path to weights input + + infile1 : str + File path to dataset containing features + + fasta_path : str + File path to dataset containing fasta file + + ref_seq : str + File path to dataset containing the reference genome sequence. + + vcf_path : str + File path to dataset containing variants info. + """ + warnings.filterwarnings('ignore') + + with open(inputs, 'r') as param_handler: + params = json.load(param_handler) + + # load model + with open(infile_estimator, 'rb') as est_handler: + estimator = load_model(est_handler) + + main_est = estimator + if isinstance(estimator, Pipeline): + main_est = estimator.steps[-1][-1] + if hasattr(main_est, 'config') and hasattr(main_est, 'load_weights'): + if not infile_weights or infile_weights == 'None': + raise ValueError("The selected model skeleton asks for weights, " + "but dataset for weights wan not selected!") + main_est.load_weights(infile_weights) + + # handle data input + input_type = params['input_options']['selected_input'] + # tabular input + if input_type == 'tabular': + header = 'infer' if params['input_options']['header1'] else None + column_option = (params['input_options'] + ['column_selector_options_1'] + ['selected_column_selector_option']) + if column_option in ['by_index_number', 'all_but_by_index_number', + 'by_header_name', 'all_but_by_header_name']: + c = params['input_options']['column_selector_options_1']['col1'] + else: + c = None + + df = pd.read_csv(infile1, sep='\t', header=header, parse_dates=True) + + X = read_columns(df, c=c, c_option=column_option).astype(float) + + if params['method'] == 'predict': + preds = estimator.predict(X) + else: + preds = estimator.predict_proba(X) + + # sparse input + elif input_type == 'sparse': + X = mmread(open(infile1, 'r')) + if params['method'] == 'predict': + preds = estimator.predict(X) + else: + preds = estimator.predict_proba(X) + + # fasta input + elif input_type == 'seq_fasta': + if not hasattr(estimator, 'data_batch_generator'): + raise ValueError( + "To do prediction on sequences in fasta input, " + "the estimator must be a `KerasGBatchClassifier`" + "equipped with data_batch_generator!") + pyfaidx = get_module('pyfaidx') + sequences = pyfaidx.Fasta(fasta_path) + n_seqs = len(sequences.keys()) + X = np.arange(n_seqs)[:, np.newaxis] + seq_length = estimator.data_batch_generator.seq_length + batch_size = getattr(estimator, 'batch_size', 32) + steps = (n_seqs + batch_size - 1) // batch_size + + seq_type = params['input_options']['seq_type'] + klass = try_get_attr( + 'galaxy_ml.preprocessors', seq_type) + + pred_data_generator = klass( + fasta_path, seq_length=seq_length) + + if params['method'] == 'predict': + preds = estimator.predict( + X, data_generator=pred_data_generator, steps=steps) + else: + preds = estimator.predict_proba( + X, data_generator=pred_data_generator, steps=steps) + + # vcf input + elif input_type == 'variant_effect': + klass = try_get_attr('galaxy_ml.preprocessors', + 'GenomicVariantBatchGenerator') + + options = params['input_options'] + options.pop('selected_input') + if options['blacklist_regions'] == 'none': + options['blacklist_regions'] = None + + pred_data_generator = klass( + ref_genome_path=ref_seq, vcf_path=vcf_path, **options) + + pred_data_generator.fit() + + preds = estimator.model_.predict_generator( + pred_data_generator.flow(batch_size=32), + workers=N_JOBS, + use_multiprocessing=True) + + if preds.min() < 0. or preds.max() > 1.: + warnings.warn('Network returning invalid probability values. ' + 'The last layer might not normalize predictions ' + 'into probabilities ' + '(like softmax or sigmoid would).') + + if params['method'] == 'predict_proba' and preds.shape[1] == 1: + # first column is probability of class 0 and second is of class 1 + preds = np.hstack([1 - preds, preds]) + + elif params['method'] == 'predict': + if preds.shape[-1] > 1: + # if the last activation is `softmax`, the sum of all + # probibilities will 1, the classification is considered as + # multi-class problem, otherwise, we take it as multi-label. + act = getattr(estimator.model_.layers[-1], 'activation', None) + if act and act.__name__ == 'softmax': + classes = preds.argmax(axis=-1) + else: + preds = (preds > 0.5).astype('int32') + else: + classes = (preds > 0.5).astype('int32') + + preds = estimator.classes_[classes] + # end input + + # output + if input_type == 'variant_effect': # TODO: save in batchs + rval = pd.DataFrame(preds) + meta = pd.DataFrame( + pred_data_generator.variants, + columns=['chrom', 'pos', 'name', 'ref', 'alt', 'strand']) + + rval = pd.concat([meta, rval], axis=1) + + elif len(preds.shape) == 1: + rval = pd.DataFrame(preds, columns=['Predicted']) + else: + rval = pd.DataFrame(preds) + + rval.to_csv(outfile_predict, sep='\t', + header=True, index=False) + + +if __name__ == '__main__': + aparser = argparse.ArgumentParser() + aparser.add_argument("-i", "--inputs", dest="inputs", required=True) + aparser.add_argument("-e", "--infile_estimator", dest="infile_estimator") + aparser.add_argument("-w", "--infile_weights", dest="infile_weights") + aparser.add_argument("-X", "--infile1", dest="infile1") + aparser.add_argument("-O", "--outfile_predict", dest="outfile_predict") + aparser.add_argument("-f", "--fasta_path", dest="fasta_path") + aparser.add_argument("-r", "--ref_seq", dest="ref_seq") + aparser.add_argument("-v", "--vcf_path", dest="vcf_path") + args = aparser.parse_args() + + main(args.inputs, args.infile_estimator, args.outfile_predict, + infile_weights=args.infile_weights, infile1=args.infile1, + fasta_path=args.fasta_path, ref_seq=args.ref_seq, + vcf_path=args.vcf_path) diff -r 21d3e08b1a48 -r 82b6104d4682 model_validations.py --- a/model_validations.py Tue Jul 09 19:26:54 2019 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,252 +0,0 @@ -""" -class ------ -OrderedKFold -RepeatedOrderedKold - - -function --------- -train_test_split -""" - -import numpy as np -import warnings - -from itertools import chain -from math import ceil, floor -from sklearn.model_selection import (GroupShuffleSplit, ShuffleSplit, - StratifiedShuffleSplit) -from sklearn.model_selection._split import _BaseKFold, _RepeatedSplits -from sklearn.utils import check_random_state, indexable, safe_indexing -from sklearn.utils.validation import _num_samples, check_array - - -def _validate_shuffle_split(n_samples, test_size, train_size, - default_test_size=None): - """ - Validation helper to check if the test/test sizes are meaningful wrt to the - size of the data (n_samples) - """ - if test_size is None and train_size is None: - test_size = default_test_size - - test_size_type = np.asarray(test_size).dtype.kind - train_size_type = np.asarray(train_size).dtype.kind - - if (test_size_type == 'i' and (test_size >= n_samples or test_size <= 0) - or test_size_type == 'f' and (test_size <= 0 or test_size >= 1)): - raise ValueError('test_size={0} should be either positive and smaller' - ' than the number of samples {1} or a float in the ' - '(0, 1) range'.format(test_size, n_samples)) - - if (train_size_type == 'i' and (train_size >= n_samples or train_size <= 0) - or train_size_type == 'f' and (train_size <= 0 or train_size >= 1)): - raise ValueError('train_size={0} should be either positive and smaller' - ' than the number of samples {1} or a float in the ' - '(0, 1) range'.format(train_size, n_samples)) - - if train_size is not None and train_size_type not in ('i', 'f'): - raise ValueError("Invalid value for train_size: {}".format(train_size)) - if test_size is not None and test_size_type not in ('i', 'f'): - raise ValueError("Invalid value for test_size: {}".format(test_size)) - - if (train_size_type == 'f' and test_size_type == 'f' and - train_size + test_size > 1): - raise ValueError( - 'The sum of test_size and train_size = {}, should be in the (0, 1)' - ' range. Reduce test_size and/or train_size.' - .format(train_size + test_size)) - - if test_size_type == 'f': - n_test = ceil(test_size * n_samples) - elif test_size_type == 'i': - n_test = float(test_size) - - if train_size_type == 'f': - n_train = floor(train_size * n_samples) - elif train_size_type == 'i': - n_train = float(train_size) - - if train_size is None: - n_train = n_samples - n_test - elif test_size is None: - n_test = n_samples - n_train - - if n_train + n_test > n_samples: - raise ValueError('The sum of train_size and test_size = %d, ' - 'should be smaller than the number of ' - 'samples %d. Reduce test_size and/or ' - 'train_size.' % (n_train + n_test, n_samples)) - - n_train, n_test = int(n_train), int(n_test) - - if n_train == 0: - raise ValueError( - 'With n_samples={}, test_size={} and train_size={}, the ' - 'resulting train set will be empty. Adjust any of the ' - 'aforementioned parameters.'.format(n_samples, test_size, - train_size) - ) - - return n_train, n_test - - -def train_test_split(*arrays, **options): - """Extend sklearn.model_selection.train_test_slit to have group split. - - Parameters - ---------- - *arrays : sequence of indexables with same length / shape[0] - Allowed inputs are lists, numpy arrays, scipy-sparse - matrices or pandas dataframes. - - test_size : float, int or None, optional (default=None) - If float, should be between 0.0 and 1.0 and represent the proportion - of the dataset to include in the test split. If int, represents the - absolute number of test samples. If None, the value is set to the - complement of the train size. If ``train_size`` is also None, it will - be set to 0.25. - - train_size : float, int, or None, (default=None) - If float, should be between 0.0 and 1.0 and represent the - proportion of the dataset to include in the train split. If - int, represents the absolute number of train samples. If None, - the value is automatically set to the complement of the test size. - - random_state : int, RandomState instance or None, optional (default=None) - If int, random_state is the seed used by the random number generator; - If RandomState instance, random_state is the random number generator; - If None, the random number generator is the RandomState instance used - by `np.random`. - - shuffle : None or str (default='simple') - How to shuffle the data before splitting. - None, no shuffle. - For str, one of 'simple', 'stratified' and 'group', corresponding to - `ShuffleSplit`, `StratifiedShuffleSplit` and `GroupShuffleSplit`, - respectively. - - labels : array-like or None (default=None) - Ignored if shuffle is None or 'simple'. - When shuffle='stratified', this array is used as class labels. - When shuffle='group', this array is used as groups. - - Returns - ------- - splitting : list, length=2 * len(arrays) - List containing train-test split of inputs. - - """ - n_arrays = len(arrays) - if n_arrays == 0: - raise ValueError("At least one array required as input") - test_size = options.pop('test_size', None) - train_size = options.pop('train_size', None) - random_state = options.pop('random_state', None) - shuffle = options.pop('shuffle', 'simple') - labels = options.pop('labels', None) - - if options: - raise TypeError("Invalid parameters passed: %s" % str(options)) - - arrays = indexable(*arrays) - - n_samples = _num_samples(arrays[0]) - if shuffle == 'group': - if labels is None: - raise ValueError("When shuffle='group', " - "labels should not be None!") - labels = check_array(labels, ensure_2d=False, dtype=None) - uniques = np.unique(labels) - n_samples = uniques.size - - n_train, n_test = _validate_shuffle_split(n_samples, test_size, train_size, - default_test_size=0.25) - - shuffle_options = dict(test_size=n_test, - train_size=n_train, - random_state=random_state) - - if shuffle is None: - if labels is not None: - warnings.warn("The `labels` is ignored for " - "shuffle being None!") - - train = np.arange(n_train) - test = np.arange(n_train, n_train + n_test) - - elif shuffle == 'simple': - if labels is not None: - warnings.warn("The `labels` is not needed and therefore " - "ignored for ShuffleSplit, as shuffle='simple'!") - - cv = ShuffleSplit(**shuffle_options) - train, test = next(cv.split(X=arrays[0], y=None)) - - elif shuffle == 'stratified': - cv = StratifiedShuffleSplit(**shuffle_options) - train, test = next(cv.split(X=arrays[0], y=labels)) - - elif shuffle == 'group': - cv = GroupShuffleSplit(**shuffle_options) - train, test = next(cv.split(X=arrays[0], y=None, groups=labels)) - - else: - raise ValueError("The argument `shuffle` only supports None, " - "'simple', 'stratified' and 'group', but got `%s`!" - % shuffle) - - return list(chain.from_iterable((safe_indexing(a, train), - safe_indexing(a, test)) for a in arrays)) - - -class OrderedKFold(_BaseKFold): - """ - Split into K fold based on ordered target value - - Parameters - ---------- - n_splits : int, default=3 - Number of folds. Must be at least 2. - shuffle: bool - random_state: None or int - """ - - def __init__(self, n_splits=3, shuffle=False, random_state=None): - super(OrderedKFold, self).__init__(n_splits, shuffle, random_state) - - def _iter_test_indices(self, X, y, groups=None): - n_samples = _num_samples(X) - n_splits = self.n_splits - y = np.asarray(y) - sorted_index = np.argsort(y) - if self.shuffle: - current = 0 - rng = check_random_state(self.random_state) - for i in range(n_samples // int(n_splits)): - start, stop = current, current + n_splits - rng.shuffle(sorted_index[start:stop]) - current = stop - rng.shuffle(sorted_index[current:]) - - for i in range(n_splits): - yield sorted_index[i:n_samples:n_splits] - - -class RepeatedOrderedKFold(_RepeatedSplits): - """ Repeated OrderedKFold runs mutiple times with different randomization. - - Parameters - ---------- - n_splits : int, default=5 - Number of folds. Must be at least 2. - - n_repeats : int, default=5 - Number of times cross-validator to be repeated. - - random_state: int, RandomState instance or None. Optional - """ - def __init__(self, n_splits=5, n_repeats=5, random_state=None): - super(RepeatedOrderedKFold, self).__init__( - OrderedKFold, n_repeats, random_state, n_splits=n_splits) diff -r 21d3e08b1a48 -r 82b6104d4682 pk_whitelist.json --- a/pk_whitelist.json Tue Jul 09 19:26:54 2019 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,768 +0,0 @@ -{ "SK_NAMES": [ - "sklearn._ASSUME_FINITE", "sklearn._isotonic._inplace_contiguous_isotonic_regression", - "sklearn._isotonic._make_unique", "sklearn.base.BaseEstimator", - "sklearn.base.BiclusterMixin", "sklearn.base.ClassifierMixin", - "sklearn.base.ClusterMixin", "sklearn.base.DensityMixin", - "sklearn.base.MetaEstimatorMixin", "sklearn.base.RegressorMixin", - "sklearn.base.TransformerMixin", "sklearn.base._first_and_last_element", - "sklearn.base._pprint", "sklearn.base.clone", - "sklearn.base.is_classifier", "sklearn.base.is_regressor", - "sklearn.clone", "sklearn.cluster.AffinityPropagation", - "sklearn.cluster.AgglomerativeClustering", "sklearn.cluster.Birch", - "sklearn.cluster.DBSCAN", "sklearn.cluster.FeatureAgglomeration", - "sklearn.cluster.KMeans", "sklearn.cluster.MeanShift", - "sklearn.cluster.MiniBatchKMeans", "sklearn.cluster.SpectralBiclustering", - "sklearn.cluster.SpectralClustering", "sklearn.cluster.SpectralCoclustering", - "sklearn.cluster._dbscan_inner.dbscan_inner", "sklearn.cluster._feature_agglomeration.AgglomerationTransform", - "sklearn.cluster._hierarchical.WeightedEdge", "sklearn.cluster._hierarchical._get_parents", - "sklearn.cluster._hierarchical._hc_get_descendent", "sklearn.cluster._hierarchical.average_merge", - "sklearn.cluster._hierarchical.compute_ward_dist", "sklearn.cluster._hierarchical.hc_get_heads", - "sklearn.cluster._hierarchical.max_merge", "sklearn.cluster._k_means._assign_labels_array", - "sklearn.cluster._k_means._assign_labels_csr", "sklearn.cluster._k_means._centers_dense", - "sklearn.cluster._k_means._centers_sparse", "sklearn.cluster._k_means._mini_batch_update_csr", - "sklearn.cluster._k_means_elkan.k_means_elkan", "sklearn.cluster.affinity_propagation", - "sklearn.cluster.affinity_propagation_.AffinityPropagation", "sklearn.cluster.affinity_propagation_.affinity_propagation", - "sklearn.cluster.bicluster.BaseSpectral", "sklearn.cluster.bicluster.SpectralBiclustering", - "sklearn.cluster.bicluster.SpectralCoclustering", "sklearn.cluster.bicluster._bistochastic_normalize", - "sklearn.cluster.bicluster._log_normalize", "sklearn.cluster.bicluster._scale_normalize", - "sklearn.cluster.birch.Birch", "sklearn.cluster.birch._CFNode", - "sklearn.cluster.birch._CFSubcluster", "sklearn.cluster.birch._iterate_sparse_X", - "sklearn.cluster.birch._split_node", "sklearn.cluster.dbscan", - "sklearn.cluster.dbscan_.DBSCAN", "sklearn.cluster.dbscan_.dbscan", - "sklearn.cluster.estimate_bandwidth", "sklearn.cluster.get_bin_seeds", - "sklearn.cluster.hierarchical.AgglomerativeClustering", "sklearn.cluster.hierarchical.FeatureAgglomeration", - "sklearn.cluster.hierarchical._TREE_BUILDERS", "sklearn.cluster.hierarchical._average_linkage", - "sklearn.cluster.hierarchical._complete_linkage", "sklearn.cluster.hierarchical._fix_connectivity", - "sklearn.cluster.hierarchical._hc_cut", "sklearn.cluster.hierarchical.linkage_tree", - "sklearn.cluster.hierarchical.ward_tree", "sklearn.cluster.k_means", - "sklearn.cluster.k_means_.FLOAT_DTYPES", "sklearn.cluster.k_means_.KMeans", - "sklearn.cluster.k_means_.MiniBatchKMeans", "sklearn.cluster.k_means_._init_centroids", - "sklearn.cluster.k_means_._k_init", "sklearn.cluster.k_means_._kmeans_single_elkan", - "sklearn.cluster.k_means_._kmeans_single_lloyd", "sklearn.cluster.k_means_._labels_inertia", - "sklearn.cluster.k_means_._labels_inertia_precompute_dense", "sklearn.cluster.k_means_._mini_batch_convergence", - "sklearn.cluster.k_means_._mini_batch_step", "sklearn.cluster.k_means_._tolerance", - "sklearn.cluster.k_means_._validate_center_shape", "sklearn.cluster.k_means_.k_means", - "sklearn.cluster.k_means_.string_types", "sklearn.cluster.linkage_tree", - "sklearn.cluster.mean_shift", "sklearn.cluster.mean_shift_.MeanShift", - "sklearn.cluster.mean_shift_._mean_shift_single_seed", "sklearn.cluster.mean_shift_.estimate_bandwidth", - "sklearn.cluster.mean_shift_.get_bin_seeds", "sklearn.cluster.mean_shift_.mean_shift", - "sklearn.cluster.spectral.SpectralClustering", "sklearn.cluster.spectral.discretize", - "sklearn.cluster.spectral.spectral_clustering", "sklearn.cluster.spectral_clustering", - "sklearn.cluster.ward_tree", "sklearn.config_context", "sklearn.compose.TransformedTargetRegressor", - "sklearn.compose._target.TransformedTargetRegressor", "sklearn.compose.ColumnTransformer", - "sklearn.compose._column_transformer.ColumnTransformer", "sklearn.compose.make_column_transformer", - "sklearn.compose._column_transformer.make_column_transformer", - "sklearn.covariance.EllipticEnvelope", "sklearn.covariance.EmpiricalCovariance", - "sklearn.covariance.GraphLasso", "sklearn.covariance.GraphLassoCV", - "sklearn.covariance.LedoitWolf", "sklearn.covariance.MinCovDet", - "sklearn.covariance.OAS", "sklearn.covariance.ShrunkCovariance", - "sklearn.covariance.empirical_covariance", "sklearn.covariance.empirical_covariance_.EmpiricalCovariance", - "sklearn.covariance.empirical_covariance_.empirical_covariance", "sklearn.covariance.empirical_covariance_.log_likelihood", - "sklearn.covariance.fast_mcd", "sklearn.covariance.graph_lasso", - "sklearn.covariance.graph_lasso_.GraphLasso", "sklearn.covariance.graph_lasso_.GraphLassoCV", - "sklearn.covariance.graph_lasso_._dual_gap", "sklearn.covariance.graph_lasso_._objective", - "sklearn.covariance.graph_lasso_.alpha_max", "sklearn.covariance.graph_lasso_.graph_lasso", - "sklearn.covariance.graph_lasso_.graph_lasso_path", "sklearn.covariance.ledoit_wolf", - "sklearn.covariance.ledoit_wolf_shrinkage", "sklearn.covariance.log_likelihood", - "sklearn.covariance.oas", "sklearn.covariance.outlier_detection.EllipticEnvelope", - "sklearn.covariance.robust_covariance.MinCovDet", "sklearn.covariance.robust_covariance._c_step", - "sklearn.covariance.robust_covariance.c_step", "sklearn.covariance.robust_covariance.fast_mcd", - "sklearn.covariance.robust_covariance.select_candidates", "sklearn.covariance.shrunk_covariance", - "sklearn.covariance.shrunk_covariance_.LedoitWolf", "sklearn.covariance.shrunk_covariance_.OAS", - "sklearn.covariance.shrunk_covariance_.ShrunkCovariance", "sklearn.covariance.shrunk_covariance_.ledoit_wolf", - "sklearn.covariance.shrunk_covariance_.ledoit_wolf_shrinkage", "sklearn.covariance.shrunk_covariance_.oas", - "sklearn.covariance.shrunk_covariance_.shrunk_covariance", "sklearn.decomposition.DictionaryLearning", - "sklearn.decomposition.FactorAnalysis", "sklearn.decomposition.FastICA", - "sklearn.decomposition.IncrementalPCA", "sklearn.decomposition.KernelPCA", - "sklearn.decomposition.LatentDirichletAllocation", "sklearn.decomposition.MiniBatchDictionaryLearning", - "sklearn.decomposition.MiniBatchSparsePCA", "sklearn.decomposition.NMF", - "sklearn.decomposition.PCA", "sklearn.decomposition.RandomizedPCA", - "sklearn.decomposition.SparseCoder", "sklearn.decomposition.SparsePCA", - "sklearn.decomposition.TruncatedSVD", "sklearn.decomposition._online_lda._dirichlet_expectation_1d", - "sklearn.decomposition._online_lda._dirichlet_expectation_2d", "sklearn.decomposition._online_lda.mean_change", - "sklearn.decomposition.base._BasePCA", "sklearn.decomposition.cdnmf_fast._update_cdnmf_fast", - "sklearn.decomposition.dict_learning", "sklearn.decomposition.dict_learning_online", - "sklearn.decomposition.factor_analysis.FactorAnalysis", "sklearn.decomposition.fastica", - "sklearn.decomposition.fastica_.FLOAT_DTYPES", "sklearn.decomposition.fastica_.FastICA", - "sklearn.decomposition.fastica_._cube", "sklearn.decomposition.fastica_._exp", - "sklearn.decomposition.fastica_._gs_decorrelation", "sklearn.decomposition.fastica_._ica_def", - "sklearn.decomposition.fastica_._ica_par", "sklearn.decomposition.fastica_._logcosh", - "sklearn.decomposition.fastica_._sym_decorrelation", "sklearn.decomposition.fastica_.fastica", - "sklearn.decomposition.fastica_.string_types", "sklearn.decomposition.incremental_pca.IncrementalPCA", - "sklearn.decomposition.kernel_pca.KernelPCA", "sklearn.decomposition.nmf.EPSILON", - "sklearn.decomposition.nmf.INTEGER_TYPES", "sklearn.decomposition.nmf.NMF", - "sklearn.decomposition.nmf._beta_divergence", "sklearn.decomposition.nmf._beta_loss_to_float", - "sklearn.decomposition.nmf._check_init", "sklearn.decomposition.nmf._check_string_param", - "sklearn.decomposition.nmf._compute_regularization", "sklearn.decomposition.nmf._fit_coordinate_descent", - "sklearn.decomposition.nmf._fit_multiplicative_update", "sklearn.decomposition.nmf._initialize_nmf", - "sklearn.decomposition.nmf._multiplicative_update_h", "sklearn.decomposition.nmf._multiplicative_update_w", - "sklearn.decomposition.nmf._special_sparse_dot", "sklearn.decomposition.nmf._update_coordinate_descent", - "sklearn.decomposition.nmf.non_negative_factorization", "sklearn.decomposition.nmf.norm", - "sklearn.decomposition.nmf.trace_dot", "sklearn.decomposition.non_negative_factorization", - "sklearn.decomposition.online_lda.EPS", "sklearn.decomposition.online_lda.LatentDirichletAllocation", - "sklearn.decomposition.online_lda._update_doc_distribution", "sklearn.decomposition.online_lda.gammaln", - "sklearn.decomposition.pca.PCA", "sklearn.decomposition.pca.RandomizedPCA", - "sklearn.decomposition.pca._assess_dimension_", "sklearn.decomposition.pca._infer_dimension_", - "sklearn.decomposition.pca.gammaln", "sklearn.decomposition.sparse_encode", - "sklearn.decomposition.sparse_pca.MiniBatchSparsePCA", "sklearn.decomposition.sparse_pca.SparsePCA", - "sklearn.decomposition.truncated_svd.TruncatedSVD", "sklearn.discriminant_analysis.LinearDiscriminantAnalysis", - "sklearn.discriminant_analysis.QuadraticDiscriminantAnalysis", "sklearn.discriminant_analysis._class_cov", - "sklearn.discriminant_analysis._class_means", "sklearn.discriminant_analysis._cov", - "sklearn.discriminant_analysis.string_types", "sklearn.ensemble.AdaBoostClassifier", - "sklearn.ensemble.AdaBoostRegressor", "sklearn.ensemble.BaggingClassifier", - "sklearn.ensemble.BaggingRegressor", "sklearn.ensemble.BaseEnsemble", - "sklearn.ensemble.ExtraTreesClassifier", "sklearn.ensemble.ExtraTreesRegressor", - "sklearn.ensemble.GradientBoostingClassifier", "sklearn.ensemble.GradientBoostingRegressor", - "sklearn.ensemble.IsolationForest", "sklearn.ensemble.RandomForestClassifier", - "sklearn.ensemble.RandomForestRegressor", "sklearn.ensemble.RandomTreesEmbedding", - "sklearn.ensemble.VotingClassifier", "sklearn.ensemble._gradient_boosting._partial_dependence_tree", - "sklearn.ensemble._gradient_boosting._predict_regression_tree_stages_sparse", "sklearn.ensemble._gradient_boosting._random_sample_mask", - "sklearn.ensemble._gradient_boosting.predict_stage", "sklearn.ensemble._gradient_boosting.predict_stages", - "sklearn.ensemble.bagging.BaggingClassifier", "sklearn.ensemble.bagging.BaggingRegressor", - "sklearn.ensemble.bagging.BaseBagging", "sklearn.ensemble.bagging.MAX_INT", - "sklearn.ensemble.bagging._generate_bagging_indices", "sklearn.ensemble.bagging._generate_indices", - "sklearn.ensemble.bagging._parallel_build_estimators", "sklearn.ensemble.bagging._parallel_decision_function", - "sklearn.ensemble.bagging._parallel_predict_log_proba", "sklearn.ensemble.bagging._parallel_predict_proba", - "sklearn.ensemble.bagging._parallel_predict_regression", "sklearn.ensemble.base.BaseEnsemble", - "sklearn.ensemble.base.MAX_RAND_SEED", "sklearn.ensemble.base._partition_estimators", - "sklearn.ensemble.base._set_random_states", "sklearn.ensemble.forest.BaseForest", - "sklearn.ensemble.forest.ExtraTreesClassifier", "sklearn.ensemble.forest.ExtraTreesRegressor", - "sklearn.ensemble.forest.ForestClassifier", "sklearn.ensemble.forest.ForestRegressor", - "sklearn.ensemble.forest.MAX_INT", "sklearn.ensemble.forest.RandomForestClassifier", - "sklearn.ensemble.forest.RandomForestRegressor", "sklearn.ensemble.forest.RandomTreesEmbedding", - "sklearn.ensemble.forest._generate_sample_indices", "sklearn.ensemble.forest._generate_unsampled_indices", - "sklearn.ensemble.forest._parallel_build_trees", "sklearn.ensemble.forest.accumulate_prediction", - "sklearn.ensemble.gradient_boosting.BaseGradientBoosting", "sklearn.ensemble.gradient_boosting.BinomialDeviance", - "sklearn.ensemble.gradient_boosting.ClassificationLossFunction", "sklearn.ensemble.gradient_boosting.ExponentialLoss", - "sklearn.ensemble.gradient_boosting.GradientBoostingClassifier", "sklearn.ensemble.gradient_boosting.GradientBoostingRegressor", - "sklearn.ensemble.gradient_boosting.HuberLossFunction", "sklearn.ensemble.gradient_boosting.INIT_ESTIMATORS", - "sklearn.ensemble.gradient_boosting.LOSS_FUNCTIONS", "sklearn.ensemble.gradient_boosting.LeastAbsoluteError", - "sklearn.ensemble.gradient_boosting.LeastSquaresError", "sklearn.ensemble.gradient_boosting.LogOddsEstimator", - "sklearn.ensemble.gradient_boosting.LossFunction", "sklearn.ensemble.gradient_boosting.MeanEstimator", - "sklearn.ensemble.gradient_boosting.MultinomialDeviance", "sklearn.ensemble.gradient_boosting.PriorProbabilityEstimator", - "sklearn.ensemble.gradient_boosting.QuantileEstimator", "sklearn.ensemble.gradient_boosting.QuantileLossFunction", - "sklearn.ensemble.gradient_boosting.RegressionLossFunction", "sklearn.ensemble.gradient_boosting.ScaledLogOddsEstimator", - "sklearn.ensemble.gradient_boosting.TREE_LEAF", "sklearn.ensemble.gradient_boosting.VerboseReporter", - "sklearn.ensemble.gradient_boosting.ZeroEstimator", "sklearn.ensemble.gradient_boosting.expit", - "sklearn.ensemble.iforest.INTEGER_TYPES", "sklearn.ensemble.iforest.IsolationForest", - "sklearn.ensemble.iforest._average_path_length", "sklearn.ensemble.iforest.euler_gamma", - "sklearn.ensemble.partial_dependence._grid_from_X", "sklearn.ensemble.partial_dependence.partial_dependence", - "sklearn.ensemble.partial_dependence.plot_partial_dependence", "sklearn.ensemble.voting_classifier.VotingClassifier", - "sklearn.ensemble.voting_classifier._parallel_fit_estimator", "sklearn.ensemble.weight_boosting.AdaBoostClassifier", - "sklearn.ensemble.weight_boosting.AdaBoostRegressor", "sklearn.ensemble.weight_boosting.BaseWeightBoosting", - "sklearn.ensemble.weight_boosting._samme_proba", "sklearn.ensemble.weight_boosting.inner1d", - "sklearn.feature_extraction.DictVectorizer", "sklearn.feature_extraction.FeatureHasher", - "sklearn.feature_extraction._hashing.transform", "sklearn.feature_extraction.dict_vectorizer.DictVectorizer", - "sklearn.feature_extraction.dict_vectorizer._tosequence", "sklearn.feature_extraction.grid_to_graph", - "sklearn.feature_extraction.hashing.FeatureHasher", "sklearn.feature_extraction.hashing._iteritems", - "sklearn.feature_extraction.image.PatchExtractor", "sklearn.feature_extraction.image._compute_gradient_3d", - "sklearn.feature_extraction.image._compute_n_patches", "sklearn.feature_extraction.image._make_edges_3d", - "sklearn.feature_extraction.image._mask_edges_weights", "sklearn.feature_extraction.image._to_graph", - "sklearn.feature_extraction.image.extract_patches", "sklearn.feature_extraction.image.extract_patches_2d", - "sklearn.feature_extraction.image.grid_to_graph", "sklearn.feature_extraction.image.img_to_graph", - "sklearn.feature_extraction.image.reconstruct_from_patches_2d", "sklearn.feature_extraction.img_to_graph", - "sklearn.feature_extraction.stop_words.ENGLISH_STOP_WORDS", "sklearn.feature_extraction.text.CountVectorizer", - "sklearn.feature_extraction.text.ENGLISH_STOP_WORDS", "sklearn.feature_extraction.text.HashingVectorizer", - "sklearn.feature_extraction.text.TfidfTransformer", "sklearn.feature_extraction.text.TfidfVectorizer", - "sklearn.feature_extraction.text.VectorizerMixin", "sklearn.feature_extraction.text._check_stop_list", - "sklearn.feature_extraction.text._document_frequency", "sklearn.feature_extraction.text._make_int_array", - "sklearn.feature_extraction.text.strip_accents_ascii", "sklearn.feature_extraction.text.strip_accents_unicode", - "sklearn.feature_extraction.text.strip_tags", "sklearn.feature_selection.GenericUnivariateSelect", - "sklearn.feature_selection.RFE", "sklearn.feature_selection.RFECV", - "sklearn.feature_selection.SelectFdr", "sklearn.feature_selection.SelectFpr", - "sklearn.feature_selection.SelectFromModel", "sklearn.feature_selection.SelectFwe", - "sklearn.feature_selection.SelectKBest", "sklearn.feature_selection.SelectPercentile", - "sklearn.feature_selection.VarianceThreshold", "sklearn.feature_selection.base.SelectorMixin", - "sklearn.feature_selection.chi2", "sklearn.feature_selection.f_classif", - "sklearn.feature_selection.f_oneway", "sklearn.feature_selection.f_regression", - "sklearn.feature_selection.from_model.SelectFromModel", "sklearn.feature_selection.from_model._calculate_threshold", - "sklearn.feature_selection.from_model._get_feature_importances", "sklearn.feature_selection.mutual_info_._compute_mi", - "sklearn.feature_selection.mutual_info_._compute_mi_cc", "sklearn.feature_selection.mutual_info_._compute_mi_cd", - "sklearn.feature_selection.mutual_info_._estimate_mi", "sklearn.feature_selection.mutual_info_._iterate_columns", - "sklearn.feature_selection.mutual_info_.digamma", "sklearn.feature_selection.mutual_info_.mutual_info_classif", - "sklearn.feature_selection.mutual_info_.mutual_info_regression", "sklearn.feature_selection.mutual_info_classif", - "sklearn.feature_selection.mutual_info_regression", "sklearn.feature_selection.rfe.RFE", - "sklearn.feature_selection.rfe.RFECV", "sklearn.feature_selection.rfe._rfe_single_fit", - "sklearn.feature_selection.univariate_selection.GenericUnivariateSelect", "sklearn.feature_selection.univariate_selection.SelectFdr", - "sklearn.feature_selection.univariate_selection.SelectFpr", "sklearn.feature_selection.univariate_selection.SelectFwe", - "sklearn.feature_selection.univariate_selection.SelectKBest", "sklearn.feature_selection.univariate_selection.SelectPercentile", - "sklearn.feature_selection.univariate_selection._BaseFilter", "sklearn.feature_selection.univariate_selection._chisquare", - "sklearn.feature_selection.univariate_selection._clean_nans", "sklearn.feature_selection.univariate_selection.chi2", - "sklearn.feature_selection.univariate_selection.f_classif", "sklearn.feature_selection.univariate_selection.f_oneway", - "sklearn.feature_selection.univariate_selection.f_regression", "sklearn.feature_selection.variance_threshold.VarianceThreshold", - "sklearn.gaussian_process.GaussianProcess", "sklearn.gaussian_process.GaussianProcessClassifier", - "sklearn.gaussian_process.GaussianProcessRegressor", "sklearn.gaussian_process.correlation_models.absolute_exponential", - "sklearn.gaussian_process.correlation_models.cubic", "sklearn.gaussian_process.correlation_models.generalized_exponential", - "sklearn.gaussian_process.correlation_models.linear", "sklearn.gaussian_process.correlation_models.pure_nugget", - "sklearn.gaussian_process.correlation_models.squared_exponential", "sklearn.gaussian_process.gaussian_process.GaussianProcess", - "sklearn.gaussian_process.gaussian_process.MACHINE_EPSILON", "sklearn.gaussian_process.gaussian_process.l1_cross_distances", - "sklearn.gaussian_process.gpc.COEFS", "sklearn.gaussian_process.gpc.GaussianProcessClassifier", - "sklearn.gaussian_process.gpc.LAMBDAS", "sklearn.gaussian_process.gpc._BinaryGaussianProcessClassifierLaplace", - "sklearn.gaussian_process.gpc.erf", "sklearn.gaussian_process.gpc.expit", - "sklearn.gaussian_process.gpr.GaussianProcessRegressor", "sklearn.gaussian_process.kernels.CompoundKernel", - "sklearn.gaussian_process.kernels.ConstantKernel", "sklearn.gaussian_process.kernels.DotProduct", - "sklearn.gaussian_process.kernels.ExpSineSquared", "sklearn.gaussian_process.kernels.Exponentiation", - "sklearn.gaussian_process.kernels.Hyperparameter", "sklearn.gaussian_process.kernels.Kernel", - "sklearn.gaussian_process.kernels.KernelOperator", "sklearn.gaussian_process.kernels.Matern", - "sklearn.gaussian_process.kernels.NormalizedKernelMixin", "sklearn.gaussian_process.kernels.PairwiseKernel", - "sklearn.gaussian_process.kernels.Product", "sklearn.gaussian_process.kernels.RBF", - "sklearn.gaussian_process.kernels.RationalQuadratic", "sklearn.gaussian_process.kernels.StationaryKernelMixin", - "sklearn.gaussian_process.kernels.Sum", "sklearn.gaussian_process.kernels.WhiteKernel", - "sklearn.gaussian_process.kernels._approx_fprime", "sklearn.gaussian_process.kernels._check_length_scale", - "sklearn.gaussian_process.kernels.gamma", "sklearn.gaussian_process.kernels.kv", - "sklearn.gaussian_process.regression_models.constant", "sklearn.gaussian_process.regression_models.linear", - "sklearn.gaussian_process.regression_models.quadratic", "sklearn.get_config", - "sklearn.isotonic.IsotonicRegression", "sklearn.isotonic.check_increasing", - "sklearn.isotonic.isotonic_regression", "sklearn.kernel_approximation.AdditiveChi2Sampler", - "sklearn.kernel_approximation.KERNEL_PARAMS", "sklearn.kernel_approximation.Nystroem", - "sklearn.kernel_approximation.RBFSampler", "sklearn.kernel_approximation.SkewedChi2Sampler", - "sklearn.kernel_ridge.KernelRidge", "sklearn.linear_model.ARDRegression", - "sklearn.linear_model.BayesianRidge", "sklearn.linear_model.ElasticNet", - "sklearn.linear_model.ElasticNetCV", "sklearn.linear_model.Hinge", - "sklearn.linear_model.Huber", "sklearn.linear_model.HuberRegressor", - "sklearn.linear_model.Lars", "sklearn.linear_model.LarsCV", - "sklearn.linear_model.Lasso", "sklearn.linear_model.LassoCV", - "sklearn.linear_model.LassoLars", "sklearn.linear_model.LassoLarsCV", - "sklearn.linear_model.LassoLarsIC", "sklearn.linear_model.LinearRegression", - "sklearn.linear_model.Log", "sklearn.linear_model.LogisticRegression", - "sklearn.linear_model.LogisticRegressionCV", "sklearn.linear_model.ModifiedHuber", - "sklearn.linear_model.MultiTaskElasticNet", "sklearn.linear_model.MultiTaskElasticNetCV", - "sklearn.linear_model.MultiTaskLasso", "sklearn.linear_model.MultiTaskLassoCV", - "sklearn.linear_model.OrthogonalMatchingPursuit", "sklearn.linear_model.OrthogonalMatchingPursuitCV", - "sklearn.linear_model.PassiveAggressiveClassifier", "sklearn.linear_model.PassiveAggressiveRegressor", - "sklearn.linear_model.Perceptron", "sklearn.linear_model.RANSACRegressor", - "sklearn.linear_model.RandomizedLasso", "sklearn.linear_model.RandomizedLogisticRegression", - "sklearn.linear_model.Ridge", "sklearn.linear_model.RidgeCV", - "sklearn.linear_model.RidgeClassifier", "sklearn.linear_model.RidgeClassifierCV", - "sklearn.linear_model.SGDClassifier", "sklearn.linear_model.SGDRegressor", - "sklearn.linear_model.SquaredLoss", "sklearn.linear_model.TheilSenRegressor", - "sklearn.linear_model.base.FLOAT_DTYPES", "sklearn.linear_model.base.LinearClassifierMixin", - "sklearn.linear_model.base.LinearModel", "sklearn.linear_model.base.LinearRegression", - "sklearn.linear_model.base.SPARSE_INTERCEPT_DECAY", "sklearn.linear_model.base.SparseCoefMixin", - "sklearn.linear_model.base._pre_fit", "sklearn.linear_model.base._preprocess_data", - "sklearn.linear_model.base._rescale_data", "sklearn.linear_model.base.center_data", - "sklearn.linear_model.base.make_dataset", "sklearn.linear_model.base.sparse_center_data", - "sklearn.linear_model.bayes.ARDRegression", "sklearn.linear_model.bayes.BayesianRidge", - "sklearn.linear_model.cd_fast.enet_coordinate_descent", "sklearn.linear_model.cd_fast.enet_coordinate_descent_gram", - "sklearn.linear_model.cd_fast.enet_coordinate_descent_multi_task", "sklearn.linear_model.cd_fast.sparse_enet_coordinate_descent", - "sklearn.linear_model.coordinate_descent.ElasticNet", "sklearn.linear_model.coordinate_descent.ElasticNetCV", - "sklearn.linear_model.coordinate_descent.Lasso", "sklearn.linear_model.coordinate_descent.LassoCV", - "sklearn.linear_model.coordinate_descent.LinearModelCV", "sklearn.linear_model.coordinate_descent.MultiTaskElasticNet", - "sklearn.linear_model.coordinate_descent.MultiTaskElasticNetCV", "sklearn.linear_model.coordinate_descent.MultiTaskLasso", - "sklearn.linear_model.coordinate_descent.MultiTaskLassoCV", "sklearn.linear_model.coordinate_descent._alpha_grid", - "sklearn.linear_model.coordinate_descent._path_residuals", "sklearn.linear_model.coordinate_descent.enet_path", - "sklearn.linear_model.coordinate_descent.lasso_path", "sklearn.linear_model.enet_path", - "sklearn.linear_model.huber.HuberRegressor", "sklearn.linear_model.huber._huber_loss_and_gradient", - "sklearn.linear_model.lars_path", "sklearn.linear_model.lasso_path", - "sklearn.linear_model.lasso_stability_path", "sklearn.linear_model.least_angle.Lars", - "sklearn.linear_model.least_angle.LarsCV", "sklearn.linear_model.least_angle.LassoLars", - "sklearn.linear_model.least_angle.LassoLarsCV", "sklearn.linear_model.least_angle.LassoLarsIC", - "sklearn.linear_model.least_angle._check_copy_and_writeable", "sklearn.linear_model.least_angle._lars_path_residues", - "sklearn.linear_model.least_angle.lars_path", "sklearn.linear_model.least_angle.solve_triangular_args", - "sklearn.linear_model.least_angle.string_types", "sklearn.linear_model.logistic.LogisticRegression", - "sklearn.linear_model.logistic.LogisticRegressionCV", "sklearn.linear_model.logistic.SCORERS", - "sklearn.linear_model.logistic._check_solver_option", "sklearn.linear_model.logistic._intercept_dot", - "sklearn.linear_model.logistic._log_reg_scoring_path", "sklearn.linear_model.logistic._logistic_grad_hess", - "sklearn.linear_model.logistic._logistic_loss", "sklearn.linear_model.logistic._logistic_loss_and_grad", - "sklearn.linear_model.logistic._multinomial_grad_hess", "sklearn.linear_model.logistic._multinomial_loss", - "sklearn.linear_model.logistic._multinomial_loss_grad", "sklearn.linear_model.logistic.expit", - "sklearn.linear_model.logistic.logistic_regression_path", "sklearn.linear_model.logistic_regression_path", - "sklearn.linear_model.omp.OrthogonalMatchingPursuit", "sklearn.linear_model.omp.OrthogonalMatchingPursuitCV", - "sklearn.linear_model.omp._cholesky_omp", "sklearn.linear_model.omp._gram_omp", - "sklearn.linear_model.omp._omp_path_residues", "sklearn.linear_model.omp.orthogonal_mp", - "sklearn.linear_model.omp.orthogonal_mp_gram", "sklearn.linear_model.omp.premature", - "sklearn.linear_model.omp.solve_triangular_args", "sklearn.linear_model.orthogonal_mp", - "sklearn.linear_model.orthogonal_mp_gram", "sklearn.linear_model.passive_aggressive.DEFAULT_EPSILON", - "sklearn.linear_model.passive_aggressive.PassiveAggressiveClassifier", "sklearn.linear_model.passive_aggressive.PassiveAggressiveRegressor", - "sklearn.linear_model.perceptron.Perceptron", "sklearn.linear_model.randomized_l1.BaseRandomizedLinearModel", - "sklearn.linear_model.randomized_l1.RandomizedLasso", "sklearn.linear_model.randomized_l1.RandomizedLogisticRegression", - "sklearn.linear_model.randomized_l1._lasso_stability_path", "sklearn.linear_model.randomized_l1._randomized_lasso", - "sklearn.linear_model.randomized_l1._randomized_logistic", "sklearn.linear_model.randomized_l1._resample_model", - "sklearn.linear_model.randomized_l1.lasso_stability_path", "sklearn.linear_model.ransac.RANSACRegressor", - "sklearn.linear_model.ransac._EPSILON", "sklearn.linear_model.ransac._dynamic_max_trials", - "sklearn.linear_model.ridge.Ridge", "sklearn.linear_model.ridge.RidgeCV", - "sklearn.linear_model.ridge.RidgeClassifier", "sklearn.linear_model.ridge.RidgeClassifierCV", - "sklearn.linear_model.ridge._BaseRidge", "sklearn.linear_model.ridge._BaseRidgeCV", - "sklearn.linear_model.ridge._RidgeGCV", "sklearn.linear_model.ridge._solve_cholesky", - "sklearn.linear_model.ridge._solve_cholesky_kernel", "sklearn.linear_model.ridge._solve_lsqr", - "sklearn.linear_model.ridge._solve_sparse_cg", "sklearn.linear_model.ridge._solve_svd", - "sklearn.linear_model.ridge.ridge_regression", "sklearn.linear_model.ridge_regression", - "sklearn.linear_model.sag.get_auto_step_size", "sklearn.linear_model.sag.sag", - "sklearn.linear_model.sag.sag_solver", "sklearn.linear_model.sag_fast.MultinomialLogLoss", - "sklearn.linear_model.sag_fast._multinomial_grad_loss_all_samples", "sklearn.linear_model.sag_fast.sag", - "sklearn.linear_model.sgd_fast.Classification", "sklearn.linear_model.sgd_fast.EpsilonInsensitive", - "sklearn.linear_model.sgd_fast.Hinge", "sklearn.linear_model.sgd_fast.Huber", - "sklearn.linear_model.sgd_fast.Log", "sklearn.linear_model.sgd_fast.LossFunction", - "sklearn.linear_model.sgd_fast.ModifiedHuber", "sklearn.linear_model.sgd_fast.Regression", - "sklearn.linear_model.sgd_fast.SquaredEpsilonInsensitive", "sklearn.linear_model.sgd_fast.SquaredHinge", - "sklearn.linear_model.sgd_fast.SquaredLoss", "sklearn.linear_model.sgd_fast._plain_sgd", - "sklearn.linear_model.sgd_fast.average_sgd", "sklearn.linear_model.sgd_fast.plain_sgd", - "sklearn.linear_model.stochastic_gradient.BaseSGD", "sklearn.linear_model.stochastic_gradient.BaseSGDClassifier", - "sklearn.linear_model.stochastic_gradient.BaseSGDRegressor", "sklearn.linear_model.stochastic_gradient.DEFAULT_EPSILON", - "sklearn.linear_model.stochastic_gradient.LEARNING_RATE_TYPES", "sklearn.linear_model.stochastic_gradient.PENALTY_TYPES", - "sklearn.linear_model.stochastic_gradient.SGDClassifier", "sklearn.linear_model.stochastic_gradient.SGDRegressor", - "sklearn.linear_model.stochastic_gradient._prepare_fit_binary", "sklearn.linear_model.stochastic_gradient.fit_binary", - "sklearn.linear_model.theil_sen.TheilSenRegressor", "sklearn.linear_model.theil_sen._EPSILON", - "sklearn.linear_model.theil_sen._breakdown_point", "sklearn.linear_model.theil_sen._lstsq", - "sklearn.linear_model.theil_sen._modified_weiszfeld_step", "sklearn.linear_model.theil_sen._spatial_median", - "sklearn.linear_model.theil_sen.binom", "sklearn.manifold.Isomap", - "sklearn.manifold.LocallyLinearEmbedding", "sklearn.manifold.MDS", - "sklearn.manifold.SpectralEmbedding", "sklearn.manifold.TSNE", - "sklearn.manifold._barnes_hut_tsne.gradient", "sklearn.manifold._utils._binary_search_perplexity", - "sklearn.manifold.isomap.Isomap", "sklearn.manifold.locally_linear.FLOAT_DTYPES", - "sklearn.manifold.locally_linear.LocallyLinearEmbedding", "sklearn.manifold.locally_linear.barycenter_kneighbors_graph", - "sklearn.manifold.locally_linear.barycenter_weights", "sklearn.manifold.locally_linear.locally_linear_embedding", - "sklearn.manifold.locally_linear.null_space", "sklearn.manifold.locally_linear_embedding", - "sklearn.manifold.mds.MDS", "sklearn.manifold.mds._smacof_single", - "sklearn.manifold.mds.smacof", "sklearn.manifold.smacof", - "sklearn.manifold.spectral_embedding", "sklearn.manifold.spectral_embedding_.SpectralEmbedding", - "sklearn.manifold.spectral_embedding_._graph_connected_component", "sklearn.manifold.spectral_embedding_._graph_is_connected", - "sklearn.manifold.spectral_embedding_._set_diag", "sklearn.manifold.spectral_embedding_.spectral_embedding", - "sklearn.manifold.t_sne.MACHINE_EPSILON", "sklearn.manifold.t_sne.TSNE", - "sklearn.manifold.t_sne._gradient_descent", "sklearn.manifold.t_sne._joint_probabilities", - "sklearn.manifold.t_sne._joint_probabilities_nn", "sklearn.manifold.t_sne._kl_divergence", - "sklearn.manifold.t_sne._kl_divergence_bh", "sklearn.manifold.t_sne.string_types", - "sklearn.manifold.t_sne.trustworthiness", "sklearn.metrics.SCORERS", - "sklearn.metrics.accuracy_score", "sklearn.metrics.adjusted_mutual_info_score", - "sklearn.metrics.adjusted_rand_score", "sklearn.metrics.auc", - "sklearn.metrics.average_precision_score", "sklearn.metrics.base._average_binary_score", - "sklearn.metrics.brier_score_loss", "sklearn.metrics.calinski_harabaz_score", - "sklearn.metrics.classification._check_binary_probabilistic_predictions", "sklearn.metrics.classification._check_targets", - "sklearn.metrics.classification._prf_divide", "sklearn.metrics.classification._weighted_sum", - "sklearn.metrics.classification.accuracy_score", "sklearn.metrics.classification.brier_score_loss", - "sklearn.metrics.classification.classification_report", "sklearn.metrics.classification.cohen_kappa_score", - "sklearn.metrics.classification.confusion_matrix", "sklearn.metrics.classification.f1_score", - "sklearn.metrics.classification.fbeta_score", "sklearn.metrics.classification.hamming_loss", - "sklearn.metrics.classification.hinge_loss", "sklearn.metrics.classification.jaccard_similarity_score", - "sklearn.metrics.classification.log_loss", "sklearn.metrics.classification.matthews_corrcoef", - "sklearn.metrics.classification.precision_recall_fscore_support", "sklearn.metrics.classification.precision_score", - "sklearn.metrics.classification.recall_score", "sklearn.metrics.classification.zero_one_loss", - "sklearn.metrics.classification_report", "sklearn.metrics.cluster.adjusted_mutual_info_score", - "sklearn.metrics.cluster.adjusted_rand_score", "sklearn.metrics.cluster.bicluster._check_rows_and_columns", - "sklearn.metrics.cluster.bicluster._jaccard", "sklearn.metrics.cluster.bicluster._pairwise_similarity", - "sklearn.metrics.cluster.bicluster.consensus_score", "sklearn.metrics.cluster.calinski_harabaz_score", - "sklearn.metrics.cluster.completeness_score", "sklearn.metrics.cluster.consensus_score", - "sklearn.metrics.cluster.contingency_matrix", "sklearn.metrics.cluster.entropy", - "sklearn.metrics.cluster.expected_mutual_info_fast.expected_mutual_information", "sklearn.metrics.cluster.expected_mutual_info_fast.gammaln", - "sklearn.metrics.cluster.expected_mutual_information", "sklearn.metrics.cluster.fowlkes_mallows_score", - "sklearn.metrics.cluster.homogeneity_completeness_v_measure", "sklearn.metrics.cluster.homogeneity_score", - "sklearn.metrics.cluster.mutual_info_score", "sklearn.metrics.cluster.normalized_mutual_info_score", - "sklearn.metrics.cluster.silhouette_samples", "sklearn.metrics.cluster.silhouette_score", - "sklearn.metrics.cluster.supervised.adjusted_mutual_info_score", "sklearn.metrics.cluster.supervised.adjusted_rand_score", - "sklearn.metrics.cluster.supervised.check_clusterings", "sklearn.metrics.cluster.supervised.comb2", - "sklearn.metrics.cluster.supervised.completeness_score", "sklearn.metrics.cluster.supervised.contingency_matrix", - "sklearn.metrics.cluster.supervised.entropy", "sklearn.metrics.cluster.supervised.fowlkes_mallows_score", - "sklearn.metrics.cluster.supervised.homogeneity_completeness_v_measure", "sklearn.metrics.cluster.supervised.homogeneity_score", - "sklearn.metrics.cluster.supervised.mutual_info_score", "sklearn.metrics.cluster.supervised.normalized_mutual_info_score", - "sklearn.metrics.cluster.supervised.v_measure_score", "sklearn.metrics.cluster.unsupervised.calinski_harabaz_score", - "sklearn.metrics.cluster.unsupervised.check_number_of_labels", "sklearn.metrics.cluster.unsupervised.silhouette_samples", - "sklearn.metrics.cluster.unsupervised.silhouette_score", "sklearn.metrics.cluster.v_measure_score", - "sklearn.metrics.cohen_kappa_score", "sklearn.metrics.completeness_score", - "sklearn.metrics.confusion_matrix", "sklearn.metrics.consensus_score", - "sklearn.metrics.coverage_error", "sklearn.metrics.euclidean_distances", - "sklearn.metrics.explained_variance_score", "sklearn.metrics.f1_score", - "sklearn.metrics.fbeta_score", "sklearn.metrics.fowlkes_mallows_score", - "sklearn.metrics.get_scorer", "sklearn.metrics.hamming_loss", - "sklearn.metrics.hinge_loss", "sklearn.metrics.homogeneity_completeness_v_measure", - "sklearn.metrics.homogeneity_score", "sklearn.metrics.jaccard_similarity_score", - "sklearn.metrics.label_ranking_average_precision_score", "sklearn.metrics.label_ranking_loss", - "sklearn.metrics.log_loss", "sklearn.metrics.make_scorer", - "sklearn.metrics.matthews_corrcoef", "sklearn.metrics.mean_absolute_error", - "sklearn.metrics.mean_squared_error", "sklearn.metrics.mean_squared_log_error", - "sklearn.metrics.median_absolute_error", "sklearn.metrics.mutual_info_score", - "sklearn.metrics.normalized_mutual_info_score", "sklearn.metrics.pairwise.KERNEL_PARAMS", - "sklearn.metrics.pairwise.PAIRED_DISTANCES", "sklearn.metrics.pairwise.PAIRWISE_BOOLEAN_FUNCTIONS", - "sklearn.metrics.pairwise.PAIRWISE_DISTANCE_FUNCTIONS", "sklearn.metrics.pairwise.PAIRWISE_KERNEL_FUNCTIONS", - "sklearn.metrics.pairwise._VALID_METRICS", "sklearn.metrics.pairwise._chi2_kernel_fast", - "sklearn.metrics.pairwise._pairwise_callable", "sklearn.metrics.pairwise._parallel_pairwise", - "sklearn.metrics.pairwise._return_float_dtype", "sklearn.metrics.pairwise._sparse_manhattan", - "sklearn.metrics.pairwise.additive_chi2_kernel", "sklearn.metrics.pairwise.check_paired_arrays", - "sklearn.metrics.pairwise.check_pairwise_arrays", "sklearn.metrics.pairwise.chi2_kernel", - "sklearn.metrics.pairwise.cosine_distances", "sklearn.metrics.pairwise.cosine_similarity", - "sklearn.metrics.pairwise.distance_metrics", "sklearn.metrics.pairwise.euclidean_distances", - "sklearn.metrics.pairwise.kernel_metrics", "sklearn.metrics.pairwise.laplacian_kernel", - "sklearn.metrics.pairwise.linear_kernel", "sklearn.metrics.pairwise.manhattan_distances", - "sklearn.metrics.pairwise.paired_cosine_distances", "sklearn.metrics.pairwise.paired_distances", - "sklearn.metrics.pairwise.paired_euclidean_distances", "sklearn.metrics.pairwise.paired_manhattan_distances", - "sklearn.metrics.pairwise.pairwise_distances", "sklearn.metrics.pairwise.pairwise_distances_argmin", - "sklearn.metrics.pairwise.pairwise_distances_argmin_min", "sklearn.metrics.pairwise.pairwise_kernels", - "sklearn.metrics.pairwise.polynomial_kernel", "sklearn.metrics.pairwise.rbf_kernel", - "sklearn.metrics.pairwise.sigmoid_kernel", "sklearn.metrics.pairwise_distances", - "sklearn.metrics.pairwise_distances_argmin", "sklearn.metrics.pairwise_distances_argmin_min", - "sklearn.metrics.pairwise_fast._chi2_kernel_fast", "sklearn.metrics.pairwise_fast._sparse_manhattan", - "sklearn.metrics.pairwise_kernels", "sklearn.metrics.precision_recall_curve", - "sklearn.metrics.precision_recall_fscore_support", "sklearn.metrics.precision_score", - "sklearn.metrics.r2_score", "sklearn.metrics.ranking._binary_clf_curve", - "sklearn.metrics.ranking.auc", "sklearn.metrics.ranking.average_precision_score", - "sklearn.metrics.ranking.coverage_error", "sklearn.metrics.ranking.label_ranking_average_precision_score", - "sklearn.metrics.ranking.label_ranking_loss", "sklearn.metrics.ranking.precision_recall_curve", - "sklearn.metrics.ranking.roc_auc_score", "sklearn.metrics.ranking.roc_curve", - "sklearn.metrics.recall_score", "sklearn.metrics.regression._check_reg_targets", - "sklearn.metrics.regression.explained_variance_score", "sklearn.metrics.regression.mean_absolute_error", - "sklearn.metrics.regression.mean_squared_error", "sklearn.metrics.regression.mean_squared_log_error", - "sklearn.metrics.regression.median_absolute_error", "sklearn.metrics.regression.r2_score", - "sklearn.metrics.regression.string_types", "sklearn.metrics.roc_auc_score", - "sklearn.metrics.roc_curve", "sklearn.metrics.scorer.SCORERS", - "sklearn.metrics.scorer._BaseScorer", "sklearn.metrics.scorer._PredictScorer", - "sklearn.metrics.scorer._ProbaScorer", "sklearn.metrics.scorer._ThresholdScorer", - "sklearn.metrics.scorer._check_multimetric_scoring", "sklearn.metrics.scorer._passthrough_scorer", - "sklearn.metrics.scorer.accuracy_scorer", "sklearn.metrics.scorer.adjusted_mutual_info_scorer", - "sklearn.metrics.scorer.adjusted_rand_scorer", "sklearn.metrics.scorer.average", - "sklearn.metrics.scorer.average_precision_scorer", "sklearn.metrics.scorer.check_scoring", - "sklearn.metrics.scorer.completeness_scorer", "sklearn.metrics.scorer.deprecation_msg", - "sklearn.metrics.scorer.explained_variance_scorer", "sklearn.metrics.scorer.f1_scorer", - "sklearn.metrics.scorer.fowlkes_mallows_scorer", "sklearn.metrics.scorer.get_scorer", - "sklearn.metrics.scorer.homogeneity_scorer", "sklearn.metrics.scorer.log_loss_scorer", - "sklearn.metrics.scorer.make_scorer", "sklearn.metrics.scorer.mean_absolute_error_scorer", - "sklearn.metrics.scorer.mean_squared_error_scorer", "sklearn.metrics.scorer.median_absolute_error_scorer", - "sklearn.metrics.scorer.mutual_info_scorer", "sklearn.metrics.scorer.name", - "sklearn.metrics.scorer.neg_log_loss_scorer", "sklearn.metrics.scorer.neg_mean_absolute_error_scorer", - "sklearn.metrics.scorer.neg_mean_squared_error_scorer", "sklearn.metrics.scorer.neg_mean_squared_log_error_scorer", - "sklearn.metrics.scorer.neg_median_absolute_error_scorer", "sklearn.metrics.scorer.normalized_mutual_info_scorer", - "sklearn.metrics.scorer.precision_scorer", "sklearn.metrics.scorer.qualified_name", - "sklearn.metrics.scorer.r2_scorer", "sklearn.metrics.scorer.recall_scorer", - "sklearn.metrics.scorer.roc_auc_scorer", "sklearn.metrics.scorer.v_measure_scorer", - "sklearn.metrics.silhouette_samples", "sklearn.metrics.silhouette_score", - "sklearn.metrics.v_measure_score", "sklearn.metrics.zero_one_loss", - "sklearn.model_selection.BaseCrossValidator", "sklearn.model_selection.GridSearchCV", - "sklearn.model_selection.GroupKFold", "sklearn.model_selection.GroupShuffleSplit", - "sklearn.model_selection.KFold", "sklearn.model_selection.LeaveOneGroupOut", - "sklearn.model_selection.LeaveOneOut", "sklearn.model_selection.LeavePGroupsOut", - "sklearn.model_selection.LeavePOut", "sklearn.model_selection.ParameterGrid", - "sklearn.model_selection.ParameterSampler", "sklearn.model_selection.PredefinedSplit", - "sklearn.model_selection.RandomizedSearchCV", "sklearn.model_selection.RepeatedKFold", - "sklearn.model_selection.RepeatedStratifiedKFold", "sklearn.model_selection.ShuffleSplit", - "sklearn.model_selection.StratifiedKFold", "sklearn.model_selection.StratifiedShuffleSplit", - "sklearn.model_selection.TimeSeriesSplit", "sklearn.model_selection._search.BaseSearchCV", - "sklearn.model_selection._search.GridSearchCV", "sklearn.model_selection._search.ParameterGrid", - "sklearn.model_selection._search.ParameterSampler", "sklearn.model_selection._search.RandomizedSearchCV", - "sklearn.model_selection._search._CVScoreTuple", "sklearn.model_selection._search._check_param_grid", - "sklearn.model_selection._search.fit_grid_point", "sklearn.model_selection._search.sp_version", - "sklearn.model_selection._split.BaseCrossValidator", "sklearn.model_selection._split.BaseShuffleSplit", - "sklearn.model_selection._split.GroupKFold", "sklearn.model_selection._split.GroupShuffleSplit", - "sklearn.model_selection._split.KFold", "sklearn.model_selection._split.LeaveOneGroupOut", - "sklearn.model_selection._split.LeaveOneOut", "sklearn.model_selection._split.LeavePGroupsOut", - "sklearn.model_selection._split.LeavePOut", "sklearn.model_selection._split.PredefinedSplit", - "sklearn.model_selection._split.RepeatedKFold", "sklearn.model_selection._split.RepeatedStratifiedKFold", - "sklearn.model_selection._split.ShuffleSplit", "sklearn.model_selection._split.StratifiedKFold", - "sklearn.model_selection._split.StratifiedShuffleSplit", "sklearn.model_selection._split.TimeSeriesSplit", - "sklearn.model_selection._split._BaseKFold", "sklearn.model_selection._split._CVIterableWrapper", - "sklearn.model_selection._split._RepeatedSplits", "sklearn.model_selection._split._approximate_mode", - "sklearn.model_selection._split._build_repr", "sklearn.model_selection._split._validate_shuffle_split", - "sklearn.model_selection._split._validate_shuffle_split_init", "sklearn.model_selection._split.check_cv", - "sklearn.model_selection._split.train_test_split", "sklearn.model_selection._validation._aggregate_score_dicts", - "sklearn.model_selection._validation._check_is_permutation", "sklearn.model_selection._validation._fit_and_predict", - "sklearn.model_selection._validation._fit_and_score", "sklearn.model_selection._validation._incremental_fit_estimator", - "sklearn.model_selection._validation._index_param_value", "sklearn.model_selection._validation._multimetric_score", - "sklearn.model_selection._validation._permutation_test_score", "sklearn.model_selection._validation._score", - "sklearn.model_selection._validation._shuffle", "sklearn.model_selection._validation._translate_train_sizes", - "sklearn.model_selection._validation.cross_val_predict", "sklearn.model_selection._validation.cross_val_score", - "sklearn.model_selection._validation.cross_validate", "sklearn.model_selection._validation.learning_curve", - "sklearn.model_selection._validation.permutation_test_score", "sklearn.model_selection._validation.validation_curve", - "sklearn.model_selection.check_cv", "sklearn.model_selection.cross_val_predict", - "sklearn.model_selection.cross_val_score", "sklearn.model_selection.cross_validate", - "sklearn.model_selection.fit_grid_point", "sklearn.model_selection.learning_curve", - "sklearn.model_selection.permutation_test_score", "sklearn.model_selection.train_test_split", - "sklearn.model_selection.validation_curve", "sklearn.multiclass.OneVsOneClassifier", - "sklearn.multiclass.OneVsRestClassifier", "sklearn.multiclass.OutputCodeClassifier", - "sklearn.multiclass._ConstantPredictor", "sklearn.multiclass._check_estimator", - "sklearn.multiclass._fit_binary", "sklearn.multiclass._fit_ovo_binary", - "sklearn.multiclass._partial_fit_binary", "sklearn.multiclass._partial_fit_ovo_binary", - "sklearn.multiclass._predict_binary", "sklearn.naive_bayes.BaseDiscreteNB", - "sklearn.naive_bayes.BaseNB", "sklearn.naive_bayes.BernoulliNB", - "sklearn.naive_bayes.GaussianNB", "sklearn.naive_bayes.MultinomialNB", - "sklearn.naive_bayes._ALPHA_MIN", "sklearn.neighbors.BallTree", - "sklearn.neighbors.DistanceMetric", "sklearn.neighbors.KDTree", - "sklearn.neighbors.KNeighborsClassifier", "sklearn.neighbors.KNeighborsRegressor", - "sklearn.neighbors.KernelDensity", "sklearn.neighbors.LSHForest", - "sklearn.neighbors.LocalOutlierFactor", "sklearn.neighbors.NearestCentroid", - "sklearn.neighbors.NearestNeighbors", "sklearn.neighbors.RadiusNeighborsClassifier", - "sklearn.neighbors.RadiusNeighborsRegressor", "sklearn.neighbors.approximate.GaussianRandomProjectionHash", - "sklearn.neighbors.approximate.HASH_DTYPE", "sklearn.neighbors.approximate.LSHForest", - "sklearn.neighbors.approximate.MAX_HASH_SIZE", "sklearn.neighbors.approximate.ProjectionToHashMixin", - "sklearn.neighbors.approximate._array_of_arrays", "sklearn.neighbors.approximate._find_longest_prefix_match", - "sklearn.neighbors.approximate._find_matching_indices", "sklearn.neighbors.ball_tree.BallTree", - "sklearn.neighbors.ball_tree.BinaryTree", "sklearn.neighbors.ball_tree.CLASS_DOC", - "sklearn.neighbors.ball_tree.DOC_DICT", "sklearn.neighbors.ball_tree.NeighborsHeap", - "sklearn.neighbors.ball_tree.NodeData", "sklearn.neighbors.ball_tree.NodeHeap", - "sklearn.neighbors.ball_tree.NodeHeapData", "sklearn.neighbors.ball_tree.VALID_METRICS", - "sklearn.neighbors.ball_tree.VALID_METRIC_IDS", "sklearn.neighbors.ball_tree.kernel_norm", - "sklearn.neighbors.ball_tree.load_heap", "sklearn.neighbors.ball_tree.newObj", - "sklearn.neighbors.ball_tree.nodeheap_sort", "sklearn.neighbors.ball_tree.offsets", - "sklearn.neighbors.ball_tree.simultaneous_sort", "sklearn.neighbors.base.KNeighborsMixin", - "sklearn.neighbors.base.NeighborsBase", "sklearn.neighbors.base.PAIRWISE_DISTANCE_FUNCTIONS", - "sklearn.neighbors.base.RadiusNeighborsMixin", "sklearn.neighbors.base.SupervisedFloatMixin", - "sklearn.neighbors.base.SupervisedIntegerMixin", "sklearn.neighbors.base.UnsupervisedMixin", - "sklearn.neighbors.base.VALID_METRICS", "sklearn.neighbors.base.VALID_METRICS_SPARSE", - "sklearn.neighbors.base._check_weights", "sklearn.neighbors.base._get_weights", - "sklearn.neighbors.classification.KNeighborsClassifier", "sklearn.neighbors.classification.RadiusNeighborsClassifier", - "sklearn.neighbors.dist_metrics.BrayCurtisDistance", "sklearn.neighbors.dist_metrics.CanberraDistance", - "sklearn.neighbors.dist_metrics.ChebyshevDistance", "sklearn.neighbors.dist_metrics.DiceDistance", - "sklearn.neighbors.dist_metrics.DistanceMetric", "sklearn.neighbors.dist_metrics.EuclideanDistance", - "sklearn.neighbors.dist_metrics.HammingDistance", "sklearn.neighbors.dist_metrics.HaversineDistance", - "sklearn.neighbors.dist_metrics.JaccardDistance", "sklearn.neighbors.dist_metrics.KulsinskiDistance", - "sklearn.neighbors.dist_metrics.METRIC_MAPPING", "sklearn.neighbors.dist_metrics.MahalanobisDistance", - "sklearn.neighbors.dist_metrics.ManhattanDistance", "sklearn.neighbors.dist_metrics.MatchingDistance", - "sklearn.neighbors.dist_metrics.MinkowskiDistance", "sklearn.neighbors.dist_metrics.PyFuncDistance", - "sklearn.neighbors.dist_metrics.RogersTanimotoDistance", "sklearn.neighbors.dist_metrics.RussellRaoDistance", - "sklearn.neighbors.dist_metrics.SEuclideanDistance", "sklearn.neighbors.dist_metrics.SokalMichenerDistance", - "sklearn.neighbors.dist_metrics.SokalSneathDistance", "sklearn.neighbors.dist_metrics.WMinkowskiDistance", - "sklearn.neighbors.dist_metrics.get_valid_metric_ids", "sklearn.neighbors.dist_metrics.newObj", - "sklearn.neighbors.graph._check_params", "sklearn.neighbors.graph._query_include_self", - "sklearn.neighbors.graph.kneighbors_graph", "sklearn.neighbors.graph.radius_neighbors_graph", - "sklearn.neighbors.kd_tree.BinaryTree", "sklearn.neighbors.kd_tree.CLASS_DOC", - "sklearn.neighbors.kd_tree.DOC_DICT", "sklearn.neighbors.kd_tree.KDTree", - "sklearn.neighbors.kd_tree.NeighborsHeap", "sklearn.neighbors.kd_tree.NodeData", - "sklearn.neighbors.kd_tree.NodeHeap", "sklearn.neighbors.kd_tree.NodeHeapData", - "sklearn.neighbors.kd_tree.VALID_METRICS", "sklearn.neighbors.kd_tree.VALID_METRIC_IDS", - "sklearn.neighbors.kd_tree.kernel_norm", "sklearn.neighbors.kd_tree.load_heap", - "sklearn.neighbors.kd_tree.newObj", "sklearn.neighbors.kd_tree.nodeheap_sort", - "sklearn.neighbors.kd_tree.offsets", "sklearn.neighbors.kd_tree.simultaneous_sort", - "sklearn.neighbors.kde.KernelDensity", "sklearn.neighbors.kde.TREE_DICT", - "sklearn.neighbors.kde.VALID_KERNELS", "sklearn.neighbors.kde.gammainc", - "sklearn.neighbors.kneighbors_graph", "sklearn.neighbors.lof.LocalOutlierFactor", - "sklearn.neighbors.nearest_centroid.NearestCentroid", "sklearn.neighbors.quad_tree.CELL_DTYPE", - "sklearn.neighbors.quad_tree._QuadTree", "sklearn.neighbors.radius_neighbors_graph", - "sklearn.neighbors.regression.KNeighborsRegressor", "sklearn.neighbors.regression.RadiusNeighborsRegressor", - "sklearn.neighbors.unsupervised.NearestNeighbors", "sklearn.pipeline.FeatureUnion", - "sklearn.pipeline.Pipeline", "sklearn.pipeline._fit_one_transformer", - "sklearn.pipeline._fit_transform_one", "sklearn.pipeline._name_estimators", - "sklearn.pipeline._transform_one", "sklearn.pipeline.make_pipeline", - "sklearn.pipeline.make_union", "sklearn.preprocessing.Binarizer", - "sklearn.preprocessing.FunctionTransformer", "sklearn.preprocessing.Imputer", - "sklearn.preprocessing.KernelCenterer", "sklearn.preprocessing.LabelBinarizer", - "sklearn.preprocessing.LabelEncoder", "sklearn.preprocessing.MaxAbsScaler", - "sklearn.preprocessing.MinMaxScaler", "sklearn.preprocessing.MultiLabelBinarizer", - "sklearn.preprocessing.Normalizer", "sklearn.preprocessing.OneHotEncoder", - "sklearn.preprocessing.PolynomialFeatures", "sklearn.preprocessing.QuantileTransformer", - "sklearn.preprocessing.RobustScaler", "sklearn.preprocessing.StandardScaler", - "sklearn.preprocessing._function_transformer.FunctionTransformer", "sklearn.preprocessing._function_transformer._identity", - "sklearn.preprocessing._function_transformer.string_types", "sklearn.preprocessing.add_dummy_feature", - "sklearn.preprocessing.binarize", "sklearn.preprocessing.data.BOUNDS_THRESHOLD", - "sklearn.preprocessing.data.Binarizer", "sklearn.preprocessing.data.FLOAT_DTYPES", - "sklearn.preprocessing.data.KernelCenterer", "sklearn.preprocessing.data.MaxAbsScaler", - "sklearn.preprocessing.data.MinMaxScaler", "sklearn.preprocessing.data.Normalizer", - "sklearn.preprocessing.data.OneHotEncoder", "sklearn.preprocessing.data.PolynomialFeatures", - "sklearn.preprocessing.data.QuantileTransformer", "sklearn.preprocessing.data.RobustScaler", - "sklearn.preprocessing.data.StandardScaler", "sklearn.preprocessing.data._handle_zeros_in_scale", - "sklearn.preprocessing.data._transform_selected", "sklearn.preprocessing.data.add_dummy_feature", - "sklearn.preprocessing.data.binarize", "sklearn.preprocessing.data.maxabs_scale", - "sklearn.preprocessing.data.minmax_scale", "sklearn.preprocessing.data.normalize", - "sklearn.preprocessing.data.quantile_transform", "sklearn.preprocessing.data.robust_scale", - "sklearn.preprocessing.data.scale", "sklearn.preprocessing.data.string_types", - "sklearn.preprocessing.imputation.FLOAT_DTYPES", "sklearn.preprocessing.imputation.Imputer", - "sklearn.preprocessing.imputation._get_mask", "sklearn.preprocessing.imputation._most_frequent", - "sklearn.preprocessing.label.LabelBinarizer", "sklearn.preprocessing.label.LabelEncoder", - "sklearn.preprocessing.label.MultiLabelBinarizer", "sklearn.preprocessing.label._inverse_binarize_multiclass", - "sklearn.preprocessing.label._inverse_binarize_thresholding", "sklearn.preprocessing.label.label_binarize", - "sklearn.preprocessing.label_binarize", "sklearn.preprocessing.maxabs_scale", - "sklearn.preprocessing.minmax_scale", "sklearn.preprocessing.normalize", - "sklearn.preprocessing.quantile_transform", "sklearn.preprocessing.robust_scale", - "sklearn.preprocessing.scale", "sklearn.random_projection.BaseRandomProjection", - "sklearn.random_projection.GaussianRandomProjection", "sklearn.random_projection.SparseRandomProjection", - "sklearn.random_projection._check_density", "sklearn.random_projection._check_input_size", - "sklearn.random_projection.gaussian_random_matrix", "sklearn.random_projection.johnson_lindenstrauss_min_dim", - "sklearn.random_projection.sparse_random_matrix", "sklearn.set_config", - "sklearn.setup_module", "sklearn.svm.LinearSVC", - "sklearn.svm.LinearSVR", "sklearn.svm.NuSVC", - "sklearn.svm.NuSVR", "sklearn.svm.OneClassSVM", - "sklearn.svm.SVC", "sklearn.svm.SVR", - "sklearn.svm.base.BaseLibSVM", "sklearn.svm.base.BaseSVC", - "sklearn.svm.base.LIBSVM_IMPL", "sklearn.svm.base._fit_liblinear", - "sklearn.svm.base._get_liblinear_solver_type", "sklearn.svm.base._one_vs_one_coef", - "sklearn.svm.bounds.l1_min_c", "sklearn.svm.classes.LinearSVC", - "sklearn.svm.classes.LinearSVR", "sklearn.svm.classes.NuSVC", - "sklearn.svm.classes.NuSVR", "sklearn.svm.classes.OneClassSVM", - "sklearn.svm.classes.SVC", "sklearn.svm.classes.SVR", - "sklearn.svm.l1_min_c", "sklearn.svm.liblinear.set_verbosity_wrap", - "sklearn.svm.liblinear.train_wrap", "sklearn.svm.libsvm.LIBSVM_KERNEL_TYPES", - "sklearn.svm.libsvm.cross_validation", "sklearn.svm.libsvm.decision_function", - "sklearn.svm.libsvm.fit", "sklearn.svm.libsvm.predict", - "sklearn.svm.libsvm.predict_proba", "sklearn.svm.libsvm.set_verbosity_wrap", - "sklearn.svm.libsvm_sparse.libsvm_sparse_decision_function", "sklearn.svm.libsvm_sparse.libsvm_sparse_predict", - "sklearn.svm.libsvm_sparse.libsvm_sparse_predict_proba", "sklearn.svm.libsvm_sparse.libsvm_sparse_train", - "sklearn.svm.libsvm_sparse.set_verbosity_wrap", "sklearn.tree.DecisionTreeClassifier", - "sklearn.tree.DecisionTreeRegressor", "sklearn.tree.ExtraTreeClassifier", - "sklearn.tree.ExtraTreeRegressor", "sklearn.tree._criterion.ClassificationCriterion", - "sklearn.tree._criterion.Criterion", "sklearn.tree._criterion.Entropy", - "sklearn.tree._criterion.FriedmanMSE", "sklearn.tree._criterion.Gini", - "sklearn.tree._criterion.MAE", "sklearn.tree._criterion.MSE", - "sklearn.tree._criterion.RegressionCriterion", "sklearn.tree._splitter.BaseDenseSplitter", - "sklearn.tree._splitter.BaseSparseSplitter", "sklearn.tree._splitter.BestSparseSplitter", - "sklearn.tree._splitter.BestSplitter", "sklearn.tree._splitter.RandomSparseSplitter", - "sklearn.tree._splitter.RandomSplitter", "sklearn.tree._splitter.Splitter", - "sklearn.tree._tree.BestFirstTreeBuilder", "sklearn.tree._tree.DepthFirstTreeBuilder", - "sklearn.tree._tree.NODE_DTYPE", "sklearn.tree._tree.TREE_LEAF", - "sklearn.tree._tree.TREE_UNDEFINED", "sklearn.tree._tree.Tree", - "sklearn.tree._tree.TreeBuilder", "sklearn.tree._utils.PriorityHeap", - "sklearn.tree._utils.Stack", "sklearn.tree._utils.WeightedMedianCalculator", - "sklearn.tree._utils.WeightedPQueue", "sklearn.tree._utils._realloc_test", - "sklearn.tree.export.SENTINEL", "sklearn.tree.export.Sentinel", - "sklearn.tree.export._color_brew", "sklearn.tree.export.export_graphviz", - "sklearn.tree.export_graphviz", "sklearn.tree.tree.BaseDecisionTree", - "sklearn.tree.tree.CRITERIA_CLF", "sklearn.tree.tree.CRITERIA_REG", - "sklearn.tree.tree.DENSE_SPLITTERS", "sklearn.tree.tree.DecisionTreeClassifier", - "sklearn.tree.tree.DecisionTreeRegressor", "sklearn.tree.tree.ExtraTreeClassifier", - "sklearn.tree.tree.ExtraTreeRegressor", "sklearn.tree.tree.SPARSE_SPLITTERS", - "sklearn.utils.Bunch", "sklearn.utils._get_n_jobs", - "sklearn.utils._logistic_sigmoid._log_logistic_sigmoid", "sklearn.utils._random._sample_without_replacement_check_input", - "sklearn.utils._random._sample_without_replacement_with_pool", "sklearn.utils._random._sample_without_replacement_with_reservoir_sampling", - "sklearn.utils._random._sample_without_replacement_with_tracking_selection", "sklearn.utils._random.sample_without_replacement", - "sklearn.utils.arrayfuncs.cholesky_delete", "sklearn.utils.arrayfuncs.min_pos", - "sklearn.utils.as_float_array", "sklearn.utils.assert_all_finite", - "sklearn.utils.axis0_safe_slice", "sklearn.utils.check_X_y", - "sklearn.utils.check_array", "sklearn.utils.check_consistent_length", - "sklearn.utils.check_random_state", "sklearn.utils.check_symmetric", - "sklearn.utils.class_weight.compute_class_weight", "sklearn.utils.class_weight.compute_sample_weight", - "sklearn.utils.column_or_1d", "sklearn.utils.compute_class_weight", - "sklearn.utils.compute_sample_weight", "sklearn.utils.deprecated", - "sklearn.utils.deprecation.DeprecationDict", "sklearn.utils.deprecation._is_deprecated", - "sklearn.utils.deprecation.deprecated", "sklearn.utils.extmath._deterministic_vector_sign_flip", - "sklearn.utils.extmath._impose_f_order", "sklearn.utils.extmath._incremental_mean_and_var", - "sklearn.utils.extmath.cartesian", "sklearn.utils.extmath.density", - "sklearn.utils.extmath.fast_dot", "sklearn.utils.extmath.fast_logdet", - "sklearn.utils.extmath.log_logistic", "sklearn.utils.extmath.logsumexp", - "sklearn.utils.extmath.make_nonnegative", "sklearn.utils.extmath.norm", - "sklearn.utils.extmath.np_version", "sklearn.utils.extmath.pinvh", - "sklearn.utils.extmath.randomized_range_finder", "sklearn.utils.extmath.randomized_svd", - "sklearn.utils.extmath.row_norms", "sklearn.utils.extmath.safe_min", - "sklearn.utils.extmath.safe_sparse_dot", "sklearn.utils.extmath.softmax", - "sklearn.utils.extmath.squared_norm", "sklearn.utils.extmath.stable_cumsum", - "sklearn.utils.extmath.svd_flip", "sklearn.utils.extmath.weighted_mode", - "sklearn.utils.fast_dict.IntFloatDict", "sklearn.utils.fast_dict.argmin", - "sklearn.utils.fixes._parse_version", "sklearn.utils.fixes.divide", - "sklearn.utils.fixes.euler_gamma", "sklearn.utils.fixes.makedirs", - "sklearn.utils.fixes.np_version", "sklearn.utils.fixes.parallel_helper", - "sklearn.utils.fixes.sp_version", "sklearn.utils.fixes.sparse_min_max", - "sklearn.utils.gen_batches", "sklearn.utils.gen_even_slices", - "sklearn.utils.graph.connected_components", "sklearn.utils.graph.graph_laplacian", - "sklearn.utils.graph.graph_shortest_path", "sklearn.utils.graph.single_source_shortest_path_length", - "sklearn.utils.graph_shortest_path.graph_shortest_path", "sklearn.utils.indexable", - "sklearn.utils.indices_to_mask", "sklearn.utils.linear_assignment_._HungarianState", - "sklearn.utils.linear_assignment_._hungarian", "sklearn.utils.linear_assignment_._step1", - "sklearn.utils.linear_assignment_._step3", "sklearn.utils.linear_assignment_._step4", - "sklearn.utils.linear_assignment_._step5", "sklearn.utils.linear_assignment_._step6", - "sklearn.utils.linear_assignment_.linear_assignment", "sklearn.utils.metaestimators._BaseComposition", - "sklearn.utils.metaestimators._IffHasAttrDescriptor", "sklearn.utils.metaestimators._safe_split", - "sklearn.utils.metaestimators.if_delegate_has_method", "sklearn.utils.multiclass._FN_UNIQUE_LABELS", - "sklearn.utils.multiclass._check_partial_fit_first_call", "sklearn.utils.multiclass._is_integral_float", - "sklearn.utils.multiclass._ovr_decision_function", "sklearn.utils.multiclass._unique_indicator", - "sklearn.utils.multiclass._unique_multiclass", "sklearn.utils.multiclass.check_classification_targets", - "sklearn.utils.multiclass.class_distribution", "sklearn.utils.multiclass.is_multilabel", - "sklearn.utils.multiclass.string_types", "sklearn.utils.multiclass.type_of_target", - "sklearn.utils.multiclass.unique_labels", "sklearn.utils.murmurhash.murmurhash3_32", - "sklearn.utils.murmurhash.murmurhash3_bytes_array_s32", "sklearn.utils.murmurhash.murmurhash3_bytes_array_u32", - "sklearn.utils.murmurhash.murmurhash3_bytes_s32", "sklearn.utils.murmurhash.murmurhash3_bytes_u32", - "sklearn.utils.murmurhash.murmurhash3_int_s32", "sklearn.utils.murmurhash.murmurhash3_int_u32", - "sklearn.utils.murmurhash3_32", "sklearn.utils.optimize._LineSearchError", - "sklearn.utils.optimize._cg", "sklearn.utils.optimize._line_search_wolfe12", - "sklearn.utils.optimize.newton_cg", "sklearn.utils.random.choice", - "sklearn.utils.random.random_choice_csc", "sklearn.utils.resample", - "sklearn.utils.safe_indexing", "sklearn.utils.safe_mask", - "sklearn.utils.safe_sqr", "sklearn.utils.seq_dataset.ArrayDataset", - "sklearn.utils.seq_dataset.CSRDataset", "sklearn.utils.seq_dataset.SequentialDataset", - "sklearn.utils.shuffle", "sklearn.utils.sparsefuncs._csc_mean_var_axis0", - "sklearn.utils.sparsefuncs._csr_mean_var_axis0", "sklearn.utils.sparsefuncs._get_elem_at_rank", - "sklearn.utils.sparsefuncs._get_median", "sklearn.utils.sparsefuncs._incr_mean_var_axis0", - "sklearn.utils.sparsefuncs._raise_error_wrong_axis", "sklearn.utils.sparsefuncs._raise_typeerror", - "sklearn.utils.sparsefuncs.count_nonzero", "sklearn.utils.sparsefuncs.csc_median_axis_0", - "sklearn.utils.sparsefuncs.incr_mean_variance_axis", "sklearn.utils.sparsefuncs.inplace_column_scale", - "sklearn.utils.sparsefuncs.inplace_csr_column_scale", "sklearn.utils.sparsefuncs.inplace_csr_row_scale", - "sklearn.utils.sparsefuncs.inplace_row_scale", "sklearn.utils.sparsefuncs.inplace_swap_column", - "sklearn.utils.sparsefuncs.inplace_swap_row", "sklearn.utils.sparsefuncs.inplace_swap_row_csc", - "sklearn.utils.sparsefuncs.inplace_swap_row_csr", "sklearn.utils.sparsefuncs.mean_variance_axis", - "sklearn.utils.sparsefuncs.min_max_axis", "sklearn.utils.sparsefuncs_fast._csc_mean_variance_axis0", - "sklearn.utils.sparsefuncs_fast._csr_mean_variance_axis0", "sklearn.utils.sparsefuncs_fast._csr_row_norms", - "sklearn.utils.sparsefuncs_fast._incr_mean_variance_axis0", "sklearn.utils.sparsefuncs_fast._inplace_csr_row_normalize_l1", - "sklearn.utils.sparsefuncs_fast._inplace_csr_row_normalize_l2", "sklearn.utils.sparsefuncs_fast.assign_rows_csr", - "sklearn.utils.sparsefuncs_fast.csc_mean_variance_axis0", "sklearn.utils.sparsefuncs_fast.csr_mean_variance_axis0", - "sklearn.utils.sparsefuncs_fast.csr_row_norms", "sklearn.utils.sparsefuncs_fast.incr_mean_variance_axis0", - "sklearn.utils.sparsefuncs_fast.inplace_csr_row_normalize_l1", "sklearn.utils.sparsefuncs_fast.inplace_csr_row_normalize_l2", - "sklearn.utils.stats._weighted_percentile", "sklearn.utils.stats.rankdata", - "sklearn.utils.tosequence", "sklearn.utils.validation.FLOAT_DTYPES", - "sklearn.utils.validation._assert_all_finite", "sklearn.utils.validation._ensure_sparse_format", - "sklearn.utils.validation._is_arraylike", "sklearn.utils.validation._num_samples", - "sklearn.utils.validation._shape_repr", "sklearn.utils.validation.as_float_array", - "sklearn.utils.validation.assert_all_finite", "sklearn.utils.validation.check_X_y", - "sklearn.utils.validation.check_array", "sklearn.utils.validation.check_consistent_length", - "sklearn.utils.validation.check_is_fitted", "sklearn.utils.validation.check_memory", - "sklearn.utils.validation.check_non_negative", "sklearn.utils.validation.check_random_state", - "sklearn.utils.validation.check_symmetric", "sklearn.utils.validation.column_or_1d", - "sklearn.utils.validation.has_fit_parameter", "sklearn.utils.validation.indexable", - "sklearn.utils.weight_vector.WeightVector" -], - - "SKR_NAMES": [ - "skrebate.MultiSURF", "skrebate.MultiSURFstar", - "skrebate.ReliefF", "skrebate.SURF", - "skrebate.SURFstar", "skrebate.TuRF", - "skrebate.multisurf.MultiSURF", "skrebate.multisurfstar.MultiSURFstar", - "skrebate.relieff.ReliefF", "skrebate.scoring_utils.MultiSURF_compute_scores", - "skrebate.scoring_utils.MultiSURFstar_compute_scores", "skrebate.scoring_utils.ReliefF_compute_scores", - "skrebate.scoring_utils.SURF_compute_scores", "skrebate.scoring_utils.SURFstar_compute_scores", - "skrebate.scoring_utils.compute_score", "skrebate.scoring_utils.get_row_missing", - "skrebate.scoring_utils.ramp_function", "skrebate.surf.SURF", - "skrebate.surfstar.SURFstar", "skrebate.turf.TuRF" - ], - - "XGB_NAMES": [ - "xgboost.Booster", "xgboost.DMatrix", - "xgboost.VERSION_FILE", "xgboost.XGBClassifier", - "xgboost.XGBModel", "xgboost.XGBRegressor", - "xgboost.callback._fmt_metric", "xgboost.callback._get_callback_context", - "xgboost.callback.early_stop", "xgboost.callback.print_evaluation", - "xgboost.callback.record_evaluation", "xgboost.callback.reset_learning_rate", - "xgboost.compat.PANDAS_INSTALLED", "xgboost.compat.PY3", - "xgboost.compat.SKLEARN_INSTALLED", "xgboost.compat.STRING_TYPES", - "xgboost.compat.py_str", "xgboost.core.Booster", - "xgboost.core.CallbackEnv", "xgboost.core.DMatrix", - "xgboost.core.EarlyStopException", "xgboost.core.PANDAS_DTYPE_MAPPER", - "xgboost.core.PANDAS_INSTALLED", "xgboost.core.PY3", - "xgboost.core.STRING_TYPES", "xgboost.core.XGBoostError", - "xgboost.core._check_call", "xgboost.core._load_lib", - "xgboost.core._maybe_pandas_data", "xgboost.core._maybe_pandas_label", - "xgboost.core.c_array", "xgboost.core.c_str", - "xgboost.core.ctypes2buffer", "xgboost.core.ctypes2numpy", - "xgboost.core.from_cstr_to_pystr", "xgboost.core.from_pystr_to_cstr", - "xgboost.cv", "xgboost.f", - "xgboost.libpath.XGBoostLibraryNotFound", "xgboost.libpath.find_lib_path", - "xgboost.plot_importance", "xgboost.plot_tree", - "xgboost.plotting._EDGEPAT", "xgboost.plotting._EDGEPAT2", - "xgboost.plotting._LEAFPAT", "xgboost.plotting._NODEPAT", - "xgboost.plotting._parse_edge", "xgboost.plotting._parse_node", - "xgboost.plotting.plot_importance", "xgboost.plotting.plot_tree", - "xgboost.plotting.to_graphviz", "xgboost.rabit.DTYPE_ENUM__", - "xgboost.rabit.STRING_TYPES", "xgboost.rabit._init_rabit", - "xgboost.rabit.allreduce", "xgboost.rabit.broadcast", - "xgboost.rabit.finalize", "xgboost.rabit.get_processor_name", - "xgboost.rabit.get_rank", "xgboost.rabit.get_world_size", - "xgboost.rabit.init", "xgboost.rabit.tracker_print", - "xgboost.rabit.version_number", "xgboost.sklearn.SKLEARN_INSTALLED", - "xgboost.sklearn.XGBClassifier", "xgboost.sklearn.XGBModel", - "xgboost.sklearn.XGBRegressor", "xgboost.sklearn._objective_decorator", - "xgboost.to_graphviz", "xgboost.train", - "xgboost.training.CVPack", "xgboost.training.SKLEARN_INSTALLED", - "xgboost.training.STRING_TYPES", "xgboost.training._train_internal", - "xgboost.training.aggcv", "xgboost.training.cv", - "xgboost.training.mknfold", "xgboost.training.train" - ], - - - "NUMPY_NAMES": [ - "numpy.core.multiarray._reconstruct", "numpy.ndarray", - "numpy.dtype", "numpy.core.multiarray.scalar", "numpy.random.__RandomState_ctor", - "numpy.ma.core._mareconstruct", "numpy.ma.core.MaskedArray" - ], - - "IMBLEARN_NAMES":[ - "imblearn.pipeline.Pipeline", "imblearn.over_sampling._random_over_sampler.RandomOverSampler", - "imblearn.under_sampling._prototype_selection._edited_nearest_neighbours.EditedNearestNeighbours" - ], - - "MLXTEND_NAMES":[ - "mlxtend.classifier.stacking_cv_classification.StackingCVClassifier", - "mlxtend.classifier.stacking_classification.StackingClassifier", - "mlxtend.regressor.stacking_cv_regression.StackingCVRegressor", - "mlxtend.regressor.stacking_regression.StackingRegressor" - ] -} \ No newline at end of file diff -r 21d3e08b1a48 -r 82b6104d4682 preprocessors.py --- a/preprocessors.py Tue Jul 09 19:26:54 2019 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,184 +0,0 @@ -""" -Z_RandomOverSampler -""" - -import imblearn -import numpy as np - -from collections import Counter -from imblearn.over_sampling.base import BaseOverSampler -from imblearn.over_sampling import RandomOverSampler -from imblearn.pipeline import Pipeline as imbPipeline -from imblearn.utils import check_target_type -from scipy import sparse -from sklearn.base import BaseEstimator, TransformerMixin -from sklearn.preprocessing.data import _handle_zeros_in_scale -from sklearn.utils import check_array, safe_indexing -from sklearn.utils.fixes import nanpercentile -from sklearn.utils.validation import (check_is_fitted, check_X_y, - FLOAT_DTYPES) - - -class Z_RandomOverSampler(BaseOverSampler): - - def __init__(self, sampling_strategy='auto', - return_indices=False, - random_state=None, - ratio=None, - negative_thres=0, - positive_thres=-1): - super(Z_RandomOverSampler, self).__init__( - sampling_strategy=sampling_strategy, ratio=ratio) - self.random_state = random_state - self.return_indices = return_indices - self.negative_thres = negative_thres - self.positive_thres = positive_thres - - @staticmethod - def _check_X_y(X, y): - y, binarize_y = check_target_type(y, indicate_one_vs_all=True) - X, y = check_X_y(X, y, accept_sparse=['csr', 'csc'], dtype=None) - return X, y, binarize_y - - def _fit_resample(self, X, y): - n_samples = X.shape[0] - - # convert y to z_score - y_z = (y - y.mean()) / y.std() - - index0 = np.arange(n_samples) - index_negative = index0[y_z > self.negative_thres] - index_positive = index0[y_z <= self.positive_thres] - index_unclassified = [x for x in index0 - if x not in index_negative - and x not in index_positive] - - y_z[index_negative] = 0 - y_z[index_positive] = 1 - y_z[index_unclassified] = -1 - - ros = RandomOverSampler( - sampling_strategy=self.sampling_strategy, - random_state=self.random_state, - ratio=self.ratio) - _, _ = ros.fit_resample(X, y_z) - sample_indices = ros.sample_indices_ - - print("Before sampler: %s. Total after: %s" - % (Counter(y_z), sample_indices.shape)) - - self.sample_indices_ = np.array(sample_indices) - - if self.return_indices: - return (safe_indexing(X, sample_indices), - safe_indexing(y, sample_indices), - sample_indices) - return (safe_indexing(X, sample_indices), - safe_indexing(y, sample_indices)) - - -def _get_quantiles(X, quantile_range): - """ - Calculate column percentiles for 2d array - - Parameters - ---------- - X : array-like, shape [n_samples, n_features] - """ - quantiles = [] - for feature_idx in range(X.shape[1]): - if sparse.issparse(X): - column_nnz_data = X.data[ - X.indptr[feature_idx]: X.indptr[feature_idx + 1]] - column_data = np.zeros(shape=X.shape[0], dtype=X.dtype) - column_data[:len(column_nnz_data)] = column_nnz_data - else: - column_data = X[:, feature_idx] - quantiles.append(nanpercentile(column_data, quantile_range)) - - quantiles = np.transpose(quantiles) - - return quantiles - - -class TDMScaler(BaseEstimator, TransformerMixin): - """ - Scale features using Training Distribution Matching (TDM) algorithm - - References - ---------- - .. [1] Thompson JA, Tan J and Greene CS (2016) Cross-platform - normalization of microarray and RNA-seq data for machine - learning applications. PeerJ 4, e1621. - """ - - def __init__(self, q_lower=25.0, q_upper=75.0, ): - self.q_lower = q_lower - self.q_upper = q_upper - - def fit(self, X, y=None): - """ - Parameters - ---------- - X : array-like, shape [n_samples, n_features] - """ - X = check_array(X, copy=True, estimator=self, dtype=FLOAT_DTYPES, - force_all_finite=True) - - if not 0 <= self.q_lower <= self.q_upper <= 100: - raise ValueError("Invalid quantile parameter values: " - "q_lower %s, q_upper: %s" - % (str(self.q_lower), str(self.q_upper))) - - # TODO sparse data - quantiles = nanpercentile(X, (self.q_lower, self.q_upper)) - iqr = quantiles[1] - quantiles[0] - - self.q_lower_ = quantiles[0] - self.q_upper_ = quantiles[1] - self.iqr_ = _handle_zeros_in_scale(iqr, copy=False) - - self.max_ = np.nanmax(X) - self.min_ = np.nanmin(X) - - return self - - def transform(self, X): - """ - Parameters - ---------- - X : {array-like, sparse matrix} - The data used to scale along the specified axis. - """ - check_is_fitted(self, 'iqr_', 'max_') - X = check_array(X, copy=True, estimator=self, dtype=FLOAT_DTYPES, - force_all_finite=True) - - # TODO sparse data - train_upper_scale = (self.max_ - self.q_upper_) / self.iqr_ - train_lower_scale = (self.q_lower_ - self.min_) / self.iqr_ - - test_quantiles = nanpercentile(X, (self.q_lower, self.q_upper)) - test_iqr = _handle_zeros_in_scale( - test_quantiles[1] - test_quantiles[0], copy=False) - - test_upper_bound = test_quantiles[1] + train_upper_scale * test_iqr - test_lower_bound = test_quantiles[0] - train_lower_scale * test_iqr - - test_min = np.nanmin(X) - if test_lower_bound < test_min: - test_lower_bound = test_min - - X[X > test_upper_bound] = test_upper_bound - X[X < test_lower_bound] = test_lower_bound - - X = (X - test_lower_bound) / (test_upper_bound - test_lower_bound)\ - * (self.max_ - self.min_) + self.min_ - - return X - - def inverse_transform(self, X): - """ - Scale the data back to the original state - """ - raise NotImplementedError("Inverse transformation is not implemented!") diff -r 21d3e08b1a48 -r 82b6104d4682 search_model_validation.py --- a/search_model_validation.py Tue Jul 09 19:26:54 2019 -0400 +++ b/search_model_validation.py Fri Aug 09 07:12:16 2019 -0400 @@ -1,22 +1,20 @@ import argparse import collections import imblearn +import joblib import json import numpy as np -import pandas +import pandas as pd import pickle import skrebate import sklearn import sys import xgboost import warnings -import iraps_classifier -import model_validations -import preprocessors -import feature_selectors from imblearn import under_sampling, over_sampling, combine from scipy.io import mmread from mlxtend import classifier, regressor +from sklearn.base import clone from sklearn import (cluster, compose, decomposition, ensemble, feature_extraction, feature_selection, gaussian_process, kernel_approximation, metrics, @@ -24,18 +22,23 @@ pipeline, preprocessing, svm, linear_model, tree, discriminant_analysis) from sklearn.exceptions import FitFailedWarning -from sklearn.externals import joblib -from sklearn.model_selection._validation import _score +from sklearn.model_selection._validation import _score, cross_validate +from sklearn.model_selection import _search, _validation -from utils import (SafeEval, get_cv, get_scoring, get_X_y, - load_model, read_columns) -from model_validations import train_test_split +from galaxy_ml.utils import (SafeEval, get_cv, get_scoring, load_model, + read_columns, try_get_attr, get_module) + +_fit_and_score = try_get_attr('galaxy_ml.model_validations', '_fit_and_score') +setattr(_search, '_fit_and_score', _fit_and_score) +setattr(_validation, '_fit_and_score', _fit_and_score) N_JOBS = int(__import__('os').environ.get('GALAXY_SLOTS', 1)) CACHE_DIR = './cached' -NON_SEARCHABLE = ('n_jobs', 'pre_dispatch', 'memory', 'steps', - 'nthread', 'verbose') +NON_SEARCHABLE = ('n_jobs', 'pre_dispatch', 'memory', '_path', + 'nthread', 'callbacks') +ALLOWED_CALLBACKS = ('EarlyStopping', 'TerminateOnNaN', 'ReduceLROnPlateau', + 'CSVLogger', 'None') def _eval_search_params(params_builder): @@ -62,9 +65,9 @@ search_list = search_list[1:].strip() # TODO maybe add regular express check ev = safe_eval_es(search_list) - preprocessors = ( + preprocessings = ( preprocessing.StandardScaler(), preprocessing.Binarizer(), - preprocessing.Imputer(), preprocessing.MaxAbsScaler(), + preprocessing.MaxAbsScaler(), preprocessing.Normalizer(), preprocessing.MinMaxScaler(), preprocessing.PolynomialFeatures(), preprocessing.RobustScaler(), feature_selection.SelectKBest(), @@ -133,21 +136,21 @@ if obj is None: newlist.append(None) elif obj == 'all_0': - newlist.extend(preprocessors[0:36]) + newlist.extend(preprocessings[0:35]) elif obj == 'sk_prep_all': # no KernalCenter() - newlist.extend(preprocessors[0:8]) + newlist.extend(preprocessings[0:7]) elif obj == 'fs_all': - newlist.extend(preprocessors[8:15]) + newlist.extend(preprocessings[7:14]) elif obj == 'decomp_all': - newlist.extend(preprocessors[15:26]) + newlist.extend(preprocessings[14:25]) elif obj == 'k_appr_all': - newlist.extend(preprocessors[26:30]) + newlist.extend(preprocessings[25:29]) elif obj == 'reb_all': - newlist.extend(preprocessors[31:36]) + newlist.extend(preprocessings[30:35]) elif obj == 'imb_all': - newlist.extend(preprocessors[36:55]) - elif type(obj) is int and -1 < obj < len(preprocessors): - newlist.append(preprocessors[obj]) + newlist.extend(preprocessings[35:54]) + elif type(obj) is int and -1 < obj < len(preprocessings): + newlist.append(preprocessings[obj]) elif hasattr(obj, 'get_params'): # user uploaded object if 'n_jobs' in obj.get_params(): newlist.append(obj.set_params(n_jobs=N_JOBS)) @@ -162,7 +165,10 @@ def main(inputs, infile_estimator, infile1, infile2, - outfile_result, outfile_object=None, groups=None): + outfile_result, outfile_object=None, + outfile_weights=None, groups=None, + ref_seq=None, intervals=None, targets=None, + fasta_path=None): """ Parameter --------- @@ -184,21 +190,40 @@ outfile_object : str, optional File path to save searchCV object + outfile_weights : str, optional + File path to save model weights + groups : str File path to dataset containing groups labels + + ref_seq : str + File path to dataset containing genome sequence file + + intervals : str + File path to dataset containing interval file + + targets : str + File path to dataset compressed target bed file + + fasta_path : str + File path to dataset containing fasta file """ - warnings.simplefilter('ignore') with open(inputs, 'r') as param_handler: params = json.load(param_handler) - if groups: - (params['search_schemes']['options']['cv_selector'] - ['groups_selector']['infile_g']) = groups params_builder = params['search_schemes']['search_params_builder'] + with open(infile_estimator, 'rb') as estimator_handler: + estimator = load_model(estimator_handler) + estimator_params = estimator.get_params() + + # store read dataframe object + loaded_df = {} + input_type = params['input_options']['selected_input'] + # tabular input if input_type == 'tabular': header = 'infer' if params['input_options']['header1'] else None column_option = (params['input_options']['column_selector_options_1'] @@ -208,16 +233,48 @@ c = params['input_options']['column_selector_options_1']['col1'] else: c = None - X = read_columns( - infile1, - c=c, - c_option=column_option, - sep='\t', - header=header, - parse_dates=True).astype(float) - else: + + df_key = infile1 + repr(header) + df = pd.read_csv(infile1, sep='\t', header=header, + parse_dates=True) + loaded_df[df_key] = df + + X = read_columns(df, c=c, c_option=column_option).astype(float) + # sparse input + elif input_type == 'sparse': X = mmread(open(infile1, 'r')) + # fasta_file input + elif input_type == 'seq_fasta': + pyfaidx = get_module('pyfaidx') + sequences = pyfaidx.Fasta(fasta_path) + n_seqs = len(sequences.keys()) + X = np.arange(n_seqs)[:, np.newaxis] + for param in estimator_params.keys(): + if param.endswith('fasta_path'): + estimator.set_params( + **{param: fasta_path}) + break + else: + raise ValueError( + "The selected estimator doesn't support " + "fasta file input! Please consider using " + "KerasGBatchClassifier with " + "FastaDNABatchGenerator/FastaProteinBatchGenerator " + "or having GenomeOneHotEncoder/ProteinOneHotEncoder " + "in pipeline!") + + elif input_type == 'refseq_and_interval': + path_params = { + 'data_batch_generator__ref_genome_path': ref_seq, + 'data_batch_generator__intervals_path': intervals, + 'data_batch_generator__target_path': targets + } + estimator.set_params(**path_params) + n_intervals = sum(1 for line in open(intervals)) + X = np.arange(n_intervals)[:, np.newaxis] + + # Get target y header = 'infer' if params['input_options']['header2'] else None column_option = (params['input_options']['column_selector_options_2'] ['selected_column_selector_option2']) @@ -226,6 +283,15 @@ c = params['input_options']['column_selector_options_2']['col2'] else: c = None + + df_key = infile2 + repr(header) + if df_key in loaded_df: + infile2 = loaded_df[df_key] + else: + infile2 = pd.read_csv(infile2, sep='\t', + header=header, parse_dates=True) + loaded_df[df_key] = infile2 + y = read_columns( infile2, c=c, @@ -233,13 +299,47 @@ sep='\t', header=header, parse_dates=True) - y = y.ravel() + if len(y.shape) == 2 and y.shape[1] == 1: + y = y.ravel() + if input_type == 'refseq_and_interval': + estimator.set_params( + data_batch_generator__features=y.ravel().tolist()) + y = None + # end y optimizer = params['search_schemes']['selected_search_scheme'] optimizer = getattr(model_selection, optimizer) + # handle gridsearchcv options options = params['search_schemes']['options'] + if groups: + header = 'infer' if (options['cv_selector']['groups_selector'] + ['header_g']) else None + column_option = (options['cv_selector']['groups_selector'] + ['column_selector_options_g'] + ['selected_column_selector_option_g']) + if column_option in ['by_index_number', 'all_but_by_index_number', + 'by_header_name', 'all_but_by_header_name']: + c = (options['cv_selector']['groups_selector'] + ['column_selector_options_g']['col_g']) + else: + c = None + + df_key = groups + repr(header) + if df_key in loaded_df: + groups = loaded_df[df_key] + + groups = read_columns( + groups, + c=c, + c_option=column_option, + sep='\t', + header=header, + parse_dates=True) + groups = groups.ravel() + options['cv_selector']['groups_selector'] = groups + splitter, groups = get_cv(options.pop('cv_selector')) options['cv'] = splitter options['n_jobs'] = N_JOBS @@ -254,100 +354,199 @@ if 'pre_dispatch' in options and options['pre_dispatch'] == '': options['pre_dispatch'] = None - with open(infile_estimator, 'rb') as estimator_handler: - estimator = load_model(estimator_handler) + # del loaded_df + del loaded_df + # handle memory memory = joblib.Memory(location=CACHE_DIR, verbose=0) # cache iraps_core fits could increase search speed significantly if estimator.__class__.__name__ == 'IRAPSClassifier': estimator.set_params(memory=memory) else: - for p, v in estimator.get_params().items(): + # For iraps buried in pipeline + for p, v in estimator_params.items(): if p.endswith('memory'): + # for case of `__irapsclassifier__memory` if len(p) > 8 and p[:-8].endswith('irapsclassifier'): # cache iraps_core fits could increase search # speed significantly new_params = {p: memory} estimator.set_params(**new_params) + # security reason, we don't want memory being + # modified unexpectedly elif v: new_params = {p, None} estimator.set_params(**new_params) + # For now, 1 CPU is suggested for iprasclassifier elif p.endswith('n_jobs'): new_params = {p: 1} estimator.set_params(**new_params) + # for security reason, types of callbacks are limited + elif p.endswith('callbacks'): + for cb in v: + cb_type = cb['callback_selection']['callback_type'] + if cb_type not in ALLOWED_CALLBACKS: + raise ValueError( + "Prohibited callback type: %s!" % cb_type) param_grid = _eval_search_params(params_builder) searcher = optimizer(estimator, param_grid, **options) - # do train_test_split - do_train_test_split = params['train_test_split'].pop('do_split') - if do_train_test_split == 'yes': - # make sure refit is choosen - if not options['refit']: - raise ValueError("Refit must be `True` for shuffle splitting!") - split_options = params['train_test_split'] + # do nested split + split_mode = params['outer_split'].pop('split_mode') + # nested CV, outer cv using cross_validate + if split_mode == 'nested_cv': + outer_cv, _ = get_cv(params['outer_split']['cv_selector']) - # splits - if split_options['shuffle'] == 'stratified': - split_options['labels'] = y - X, X_test, y, y_test = train_test_split(X, y, **split_options) - elif split_options['shuffle'] == 'group': - if not groups: - raise ValueError("No group based CV option was " - "choosen for group shuffle!") - split_options['labels'] = groups - X, X_test, y, y_test, groups, _ =\ - train_test_split(X, y, **split_options) + if options['error_score'] == 'raise': + rval = cross_validate( + searcher, X, y, scoring=options['scoring'], + cv=outer_cv, n_jobs=N_JOBS, verbose=0, + error_score=options['error_score']) else: - if split_options['shuffle'] == 'None': - split_options['shuffle'] = None - X, X_test, y, y_test =\ - train_test_split(X, y, **split_options) - # end train_test_split + warnings.simplefilter('always', FitFailedWarning) + with warnings.catch_warnings(record=True) as w: + try: + rval = cross_validate( + searcher, X, y, + scoring=options['scoring'], + cv=outer_cv, n_jobs=N_JOBS, + verbose=0, + error_score=options['error_score']) + except ValueError: + pass + for warning in w: + print(repr(warning.message)) - if options['error_score'] == 'raise': - searcher.fit(X, y, groups=groups) + keys = list(rval.keys()) + for k in keys: + if k.startswith('test'): + rval['mean_' + k] = np.mean(rval[k]) + rval['std_' + k] = np.std(rval[k]) + if k.endswith('time'): + rval.pop(k) + rval = pd.DataFrame(rval) + rval = rval[sorted(rval.columns)] + rval.to_csv(path_or_buf=outfile_result, sep='\t', + header=True, index=False) else: - warnings.simplefilter('always', FitFailedWarning) - with warnings.catch_warnings(record=True) as w: - try: - searcher.fit(X, y, groups=groups) - except ValueError: - pass - for warning in w: - print(repr(warning.message)) + if split_mode == 'train_test_split': + train_test_split = try_get_attr( + 'galaxy_ml.model_validations', 'train_test_split') + # make sure refit is choosen + # this could be True for sklearn models, but not the case for + # deep learning models + if not options['refit'] and \ + not all(hasattr(estimator, attr) + for attr in ('config', 'model_type')): + warnings.warn("Refit is change to `True` for nested " + "validation!") + setattr(searcher, 'refit', True) + split_options = params['outer_split'] - if do_train_test_split == 'no': - # save results - cv_results = pandas.DataFrame(searcher.cv_results_) - cv_results = cv_results[sorted(cv_results.columns)] - cv_results.to_csv(path_or_buf=outfile_result, sep='\t', - header=True, index=False) + # splits + if split_options['shuffle'] == 'stratified': + split_options['labels'] = y + X, X_test, y, y_test = train_test_split(X, y, **split_options) + elif split_options['shuffle'] == 'group': + if groups is None: + raise ValueError("No group based CV option was " + "choosen for group shuffle!") + split_options['labels'] = groups + if y is None: + X, X_test, groups, _ =\ + train_test_split(X, groups, **split_options) + else: + X, X_test, y, y_test, groups, _ =\ + train_test_split(X, y, groups, **split_options) + else: + if split_options['shuffle'] == 'None': + split_options['shuffle'] = None + X, X_test, y, y_test =\ + train_test_split(X, y, **split_options) + # end train_test_split - # output test result using best_estimator_ - else: - best_estimator_ = searcher.best_estimator_ - if isinstance(options['scoring'], collections.Mapping): - is_multimetric = True + # shared by both train_test_split and non-split + if options['error_score'] == 'raise': + searcher.fit(X, y, groups=groups) else: - is_multimetric = False + warnings.simplefilter('always', FitFailedWarning) + with warnings.catch_warnings(record=True) as w: + try: + searcher.fit(X, y, groups=groups) + except ValueError: + pass + for warning in w: + print(repr(warning.message)) + + # no outer split + if split_mode == 'no': + # save results + cv_results = pd.DataFrame(searcher.cv_results_) + cv_results = cv_results[sorted(cv_results.columns)] + cv_results.to_csv(path_or_buf=outfile_result, sep='\t', + header=True, index=False) - test_score = _score(best_estimator_, X_test, - y_test, options['scoring'], - is_multimetric=is_multimetric) - if not is_multimetric: - test_score = {primary_scoring: test_score} - for key, value in test_score.items(): - test_score[key] = [value] - result_df = pandas.DataFrame(test_score) - result_df.to_csv(path_or_buf=outfile_result, sep='\t', - header=True, index=False) + # train_test_split, output test result using best_estimator_ + # or rebuild the trained estimator using weights if applicable. + else: + scorer_ = searcher.scorer_ + if isinstance(scorer_, collections.Mapping): + is_multimetric = True + else: + is_multimetric = False + + best_estimator_ = getattr(searcher, 'best_estimator_', None) + if not best_estimator_: + raise ValueError("GridSearchCV object has no " + "`best_estimator_` when `refit`=False!") + + if best_estimator_.__class__.__name__ == 'KerasGBatchClassifier' \ + and hasattr(estimator.data_batch_generator, 'target_path'): + test_score = best_estimator_.evaluate( + X_test, scorer=scorer_, is_multimetric=is_multimetric) + else: + test_score = _score(best_estimator_, X_test, + y_test, scorer_, + is_multimetric=is_multimetric) + + if not is_multimetric: + test_score = {primary_scoring: test_score} + for key, value in test_score.items(): + test_score[key] = [value] + result_df = pd.DataFrame(test_score) + result_df.to_csv(path_or_buf=outfile_result, sep='\t', + header=True, index=False) memory.clear(warn=False) if outfile_object: + best_estimator_ = getattr(searcher, 'best_estimator_', None) + if not best_estimator_: + warnings.warn("GridSearchCV object has no attribute " + "'best_estimator_', because either it's " + "nested gridsearch or `refit` is False!") + return + + main_est = best_estimator_ + if isinstance(best_estimator_, pipeline.Pipeline): + main_est = best_estimator_.steps[-1][-1] + + if hasattr(main_est, 'model_') \ + and hasattr(main_est, 'save_weights'): + if outfile_weights: + main_est.save_weights(outfile_weights) + del main_est.model_ + del main_est.fit_params + del main_est.model_class_ + del main_est.validation_data + if getattr(main_est, 'data_generator_', None): + del main_est.data_generator_ + del main_est.data_batch_generator + with open(outfile_object, 'wb') as output_handler: - pickle.dump(searcher, output_handler, pickle.HIGHEST_PROTOCOL) + pickle.dump(best_estimator_, output_handler, + pickle.HIGHEST_PROTOCOL) if __name__ == '__main__': @@ -356,11 +555,18 @@ aparser.add_argument("-e", "--estimator", dest="infile_estimator") aparser.add_argument("-X", "--infile1", dest="infile1") aparser.add_argument("-y", "--infile2", dest="infile2") - aparser.add_argument("-r", "--outfile_result", dest="outfile_result") + aparser.add_argument("-O", "--outfile_result", dest="outfile_result") aparser.add_argument("-o", "--outfile_object", dest="outfile_object") + aparser.add_argument("-w", "--outfile_weights", dest="outfile_weights") aparser.add_argument("-g", "--groups", dest="groups") + aparser.add_argument("-r", "--ref_seq", dest="ref_seq") + aparser.add_argument("-b", "--intervals", dest="intervals") + aparser.add_argument("-t", "--targets", dest="targets") + aparser.add_argument("-f", "--fasta_path", dest="fasta_path") args = aparser.parse_args() main(args.inputs, args.infile_estimator, args.infile1, args.infile2, args.outfile_result, outfile_object=args.outfile_object, - groups=args.groups) + outfile_weights=args.outfile_weights, groups=args.groups, + ref_seq=args.ref_seq, intervals=args.intervals, + targets=args.targets, fasta_path=args.fasta_path) diff -r 21d3e08b1a48 -r 82b6104d4682 search_model_validation.xml --- a/search_model_validation.xml Tue Jul 09 19:26:54 2019 -0400 +++ b/search_model_validation.xml Fri Aug 09 07:12:16 2019 -0400 @@ -6,20 +6,37 @@ echo "@VERSION@" - + '${target_file.element_identifier}.gz' && + tabix -p bed '${target_file.element_identifier}.gz' && + #end if python '$__tool_directory__/search_model_validation.py' --inputs '$inputs' --estimator '$search_schemes.infile_estimator' + #if $input_options.selected_input == 'seq_fasta' + --fasta_path '$input_options.fasta_path' + #elif $input_options.selected_input == 'refseq_and_interval' + --ref_seq '$input_options.ref_genome_file' + --interval '$input_options.interval_file' + --targets "`pwd`/${target_file.element_identifier}.gz" + #else --infile1 '$input_options.infile1' + #end if --infile2 '$input_options.infile2' - --outfile_result '$outfile_result' - #if $save + --outfile_result "`pwd`/tmp_outfile_result" + #if $save != 'nope' --outfile_object '$outfile_object' #end if + #if $save == 'save_weights' + --outfile_weights '$outfile_weights' + #end if #if $search_schemes.options.cv_selector.selected_cv in ['GroupKFold', 'GroupShuffleSplit', 'LeaveOneGroupOut', 'LeavePGroupsOut'] - --groups '$inputs,$search_schemes.options.cv_selector.groups_selector.infile_g' + --groups '$search_schemes.options.cv_selector.groups_selector.infile_g' #end if + >'$outfile_result' && cp tmp_outfile_result '$outfile_result'; ]]> @@ -47,17 +64,17 @@ - - - - - + + + + + - + - + @@ -66,12 +83,23 @@ + + + + + + + + - - save + + save != 'nope' + + + save == 'save_weights' @@ -227,6 +255,7 @@ + @@ -331,6 +360,7 @@ + @@ -509,7 +539,7 @@ - + @@ -593,7 +623,7 @@ - + + + + + +
+ + + + + +
+
+ + + + + + + + + + + + + + + + + + + + + +
) - 9 sklearn_feature_selection.GenericUnivariateSelect(mode='percentile', param=1e-05, score_func=) - 10 sklearn_feature_selection.SelectPercentile(percentile=10, score_func=) - 11 sklearn_feature_selection.SelectFpr(alpha=0.05, score_func=) - 12 sklearn_feature_selection.SelectFdr(alpha=0.05, score_func=) - 13 sklearn_feature_selection.SelectFwe(alpha=0.05, score_func=) - 14 sklearn_feature_selection.VarianceThreshold(threshold=0.0) - 15 sklearn_decomposition.FactorAnalysis(copy=True, iterated_power=3, max_iter=1000, n_components=None, + 2 sklearn_preprocessing.MaxAbsScaler(copy=True) + 3 sklearn_preprocessing.Normalizer(copy=True, norm='l2') + 4 sklearn_preprocessing.MinMaxScaler(copy=True, feature_range=(0, 1)) + 5 sklearn_preprocessing.PolynomialFeatures(degree=2, include_bias=True, interaction_only=False) + 6 sklearn_preprocessing.RobustScaler(copy=True, quantile_range=(25.0, 75.0), with_centering=True, with_scaling=True) + 7 sklearn_feature_selection.SelectKBest(k=10, score_func=) + 8 sklearn_feature_selection.GenericUnivariateSelect(mode='percentile', param=1e-05, score_func=) + 9 sklearn_feature_selection.SelectPercentile(percentile=10, score_func=) + 10 sklearn_feature_selection.SelectFpr(alpha=0.05, score_func=) + 11 sklearn_feature_selection.SelectFdr(alpha=0.05, score_func=) + 12 sklearn_feature_selection.SelectFwe(alpha=0.05, score_func=) + 13 sklearn_feature_selection.VarianceThreshold(threshold=0.0) + 14 sklearn_decomposition.FactorAnalysis(copy=True, iterated_power=3, max_iter=1000, n_components=None, noise_variance_init=None, random_state=0, svd_method='randomized', tol=0.01) - 16 sklearn_decomposition.FastICA(algorithm='parallel', fun='logcosh', fun_args=None, + 15 sklearn_decomposition.FastICA(algorithm='parallel', fun='logcosh', fun_args=None, max_iter=200, n_components=None, random_state=0, tol=0.0001, w_init=None, whiten=True) - 17 sklearn_decomposition.IncrementalPCA(batch_size=None, copy=True, n_components=None, whiten=False) - 18 sklearn_decomposition.KernelPCA(alpha=1.0, coef0=1, copy_X=True, degree=3, eigen_solver='auto', + 16 sklearn_decomposition.IncrementalPCA(batch_size=None, copy=True, n_components=None, whiten=False) + 17 sklearn_decomposition.KernelPCA(alpha=1.0, coef0=1, copy_X=True, degree=3, eigen_solver='auto', fit_inverse_transform=False, gamma=None, kernel='linear', kernel_params=None, max_iter=None, n_components=None, random_state=0, remove_zero_eig=False, tol=0) - 19 sklearn_decomposition.LatentDirichletAllocation(batch_size=128, doc_topic_prior=None, evaluate_every=-1, learning_decay=0.7, + 18 sklearn_decomposition.LatentDirichletAllocation(batch_size=128, doc_topic_prior=None, evaluate_every=-1, learning_decay=0.7, learning_method=None, learning_offset=10.0, max_doc_update_iter=100, max_iter=10, mean_change_tol=0.001, n_components=10, n_topics=None, perp_tol=0.1, random_state=0, topic_word_prior=None, total_samples=1000000.0, verbose=0) - 20 sklearn_decomposition.MiniBatchDictionaryLearning(alpha=1, batch_size=3, dict_init=None, fit_algorithm='lars', + 19 sklearn_decomposition.MiniBatchDictionaryLearning(alpha=1, batch_size=3, dict_init=None, fit_algorithm='lars', n_components=None, n_iter=1000, random_state=0, shuffle=True, split_sign=False, transform_algorithm='omp', transform_alpha=None, transform_n_nonzero_coefs=None, verbose=False) - 21 sklearn_decomposition.MiniBatchSparsePCA(alpha=1, batch_size=3, callback=None, method='lars', n_components=None, + 20 sklearn_decomposition.MiniBatchSparsePCA(alpha=1, batch_size=3, callback=None, method='lars', n_components=None, n_iter=100, random_state=0, ridge_alpha=0.01, shuffle=True, verbose=False) - 22 sklearn_decomposition.NMF(alpha=0.0, beta_loss='frobenius', init=None, l1_ratio=0.0, max_iter=200, + 21 sklearn_decomposition.NMF(alpha=0.0, beta_loss='frobenius', init=None, l1_ratio=0.0, max_iter=200, n_components=None, random_state=0, shuffle=False, solver='cd', tol=0.0001, verbose=0) - 23 sklearn_decomposition.PCA(copy=True, iterated_power='auto', n_components=None, random_state=0, svd_solver='auto', tol=0.0, whiten=False) - 24 sklearn_decomposition.SparsePCA(U_init=None, V_init=None, alpha=1, max_iter=1000, method='lars', + 22 sklearn_decomposition.PCA(copy=True, iterated_power='auto', n_components=None, random_state=0, svd_solver='auto', tol=0.0, whiten=False) + 23 sklearn_decomposition.SparsePCA(U_init=None, V_init=None, alpha=1, max_iter=1000, method='lars', n_components=None, random_state=0, ridge_alpha=0.01, tol=1e-08, verbose=False) - 25 sklearn_decomposition.TruncatedSVD(algorithm='randomized', n_components=2, n_iter=5, random_state=0, tol=0.0) - 26 sklearn_kernel_approximation.Nystroem(coef0=None, degree=None, gamma=None, kernel='rbf', + 24 sklearn_decomposition.TruncatedSVD(algorithm='randomized', n_components=2, n_iter=5, random_state=0, tol=0.0) + 25 sklearn_kernel_approximation.Nystroem(coef0=None, degree=None, gamma=None, kernel='rbf', kernel_params=None, n_components=100, random_state=0) - 27 sklearn_kernel_approximation.RBFSampler(gamma=1.0, n_components=100, random_state=0) - 28 sklearn_kernel_approximation.AdditiveChi2Sampler(sample_interval=None, sample_steps=2) - 29 sklearn_kernel_approximation.SkewedChi2Sampler(n_components=100, random_state=0, skewedness=1.0) - 30 sklearn_cluster.FeatureAgglomeration(affinity='euclidean', compute_full_tree='auto', connectivity=None, + 26 sklearn_kernel_approximation.RBFSampler(gamma=1.0, n_components=100, random_state=0) + 27 sklearn_kernel_approximation.AdditiveChi2Sampler(sample_interval=None, sample_steps=2) + 28 sklearn_kernel_approximation.SkewedChi2Sampler(n_components=100, random_state=0, skewedness=1.0) + 29 sklearn_cluster.FeatureAgglomeration(affinity='euclidean', compute_full_tree='auto', connectivity=None, linkage='ward', memory=None, n_clusters=2, pooling_func=) - 31 skrebate_ReliefF(discrete_threshold=10, n_features_to_select=10, n_neighbors=100, verbose=False) - 32 skrebate_SURF(discrete_threshold=10, n_features_to_select=10, verbose=False) - 33 skrebate_SURFstar(discrete_threshold=10, n_features_to_select=10, verbose=False) - 34 skrebate_MultiSURF(discrete_threshold=10, n_features_to_select=10, verbose=False) - 35 skrebate_MultiSURFstar(discrete_threshold=10, n_features_to_select=10, verbose=False) - 'sk_prep_all': All sklearn preprocessing estimators, i.e., 0-7 - 'fs_all': All feature_selection estimators, i.e., 8-14 - 'decomp_all': All decomposition estimators, i.e., 15-25 - 'k_appr_all': All kernel_approximation estimators, i.e., 26-29 - 'reb_all': All skrebate estimators, i.e., 31-35 - 'all_0': All except the imbalanced-learn samplers, i.e., 0-35 - 'imb_all': All imbalanced-learn sampling methods, i.e., 36-54. + 30 skrebate_ReliefF(discrete_threshold=10, n_features_to_select=10, n_neighbors=100, verbose=False) + 31 skrebate_SURF(discrete_threshold=10, n_features_to_select=10, verbose=False) + 32 skrebate_SURFstar(discrete_threshold=10, n_features_to_select=10, verbose=False) + 33 skrebate_MultiSURF(discrete_threshold=10, n_features_to_select=10, verbose=False) + 34 skrebate_MultiSURFstar(discrete_threshold=10, n_features_to_select=10, verbose=False) + 'sk_prep_all': All sklearn preprocessing estimators, i.e., 0-6 + 'fs_all': All feature_selection estimators, i.e., 7-13 + 'decomp_all': All decomposition estimators, i.e., 14-24 + 'k_appr_all': All kernel_approximation estimators, i.e., 25-28 + 'reb_all': All skrebate estimators, i.e., 30-34 + 'all_0': All except the imbalanced-learn samplers, i.e., 0-34 + 'imb_all': All imbalanced-learn sampling methods, i.e., 35-53. **CAUTION**: Mix of imblearn and other preprocessors may not work. None: opt out of preprocessor Support mix (CAUTION: Mix of imblearn and other preprocessors may not work), e.g.:: - : [None, 'sk_prep_all', 22, 'k_appr_all', sklearn_feature_selection.SelectKBest(k=50)] + : [None, 'sk_prep_all', 21, 'k_appr_all', sklearn_feature_selection.SelectKBest(k=50)] diff -r 21d3e08b1a48 -r 82b6104d4682 stacking_ensembles.py --- a/stacking_ensembles.py Tue Jul 09 19:26:54 2019 -0400 +++ b/stacking_ensembles.py Fri Aug 09 07:12:16 2019 -0400 @@ -1,26 +1,17 @@ import argparse +import ast import json +import mlxtend.regressor +import mlxtend.classifier import pandas as pd import pickle -import xgboost +import sklearn +import sys import warnings -from sklearn import (cluster, compose, decomposition, ensemble, - feature_extraction, feature_selection, - gaussian_process, kernel_approximation, metrics, - model_selection, naive_bayes, neighbors, - pipeline, preprocessing, svm, linear_model, - tree, discriminant_analysis) -from sklearn.model_selection._split import check_cv -from feature_selectors import (DyRFE, DyRFECV, - MyPipeline, MyimbPipeline) -from iraps_classifier import (IRAPSCore, IRAPSClassifier, - BinarizeTargetClassifier, - BinarizeTargetRegressor) -from preprocessors import Z_RandomOverSampler -from utils import load_model, get_cv, get_estimator, get_search_params +from sklearn import ensemble -from mlxtend.regressor import StackingCVRegressor, StackingRegressor -from mlxtend.classifier import StackingCVClassifier, StackingClassifier +from galaxy_ml.utils import (load_model, get_cv, get_estimator, + get_search_params) warnings.filterwarnings('ignore') @@ -51,6 +42,8 @@ with open(inputs_path, 'r') as param_handler: params = json.load(param_handler) + estimator_type = params['algo_selection']['estimator_type'] + # get base estimators base_estimators = [] for idx, base_file in enumerate(base_paths.split(',')): if base_file and base_file != 'None': @@ -60,14 +53,23 @@ estimator_json = (params['base_est_builder'][idx] ['estimator_selector']) model = get_estimator(estimator_json) - base_estimators.append(model) + + if estimator_type.startswith('sklearn'): + named = model.__class__.__name__.lower() + named = 'base_%d_%s' % (idx, named) + base_estimators.append((named, model)) + else: + base_estimators.append(model) - if meta_path: - with open(meta_path, 'rb') as f: - meta_estimator = load_model(f) - else: - estimator_json = params['meta_estimator']['estimator_selector'] - meta_estimator = get_estimator(estimator_json) + # get meta estimator, if applicable + if estimator_type.startswith('mlxtend'): + if meta_path: + with open(meta_path, 'rb') as f: + meta_estimator = load_model(f) + else: + estimator_json = (params['algo_selection'] + ['meta_estimator']['estimator_selector']) + meta_estimator = get_estimator(estimator_json) options = params['algo_selection']['options'] @@ -78,26 +80,26 @@ # set n_jobs options['n_jobs'] = N_JOBS - if params['algo_selection']['estimator_type'] == 'StackingCVClassifier': - ensemble_estimator = StackingCVClassifier( + weights = options.pop('weights', None) + if weights: + options['weights'] = ast.literal_eval(weights) + + mod_and_name = estimator_type.split('_') + mod = sys.modules[mod_and_name[0]] + klass = getattr(mod, mod_and_name[1]) + + if estimator_type.startswith('sklearn'): + options['n_jobs'] = N_JOBS + ensemble_estimator = klass(base_estimators, **options) + + elif mod == mlxtend.classifier: + ensemble_estimator = klass( classifiers=base_estimators, meta_classifier=meta_estimator, **options) - elif params['algo_selection']['estimator_type'] == 'StackingClassifier': - ensemble_estimator = StackingClassifier( - classifiers=base_estimators, - meta_classifier=meta_estimator, - **options) - - elif params['algo_selection']['estimator_type'] == 'StackingCVRegressor': - ensemble_estimator = StackingCVRegressor( - regressors=base_estimators, - meta_regressor=meta_estimator, - **options) - else: - ensemble_estimator = StackingRegressor( + ensemble_estimator = klass( regressors=base_estimators, meta_regressor=meta_estimator, **options) diff -r 21d3e08b1a48 -r 82b6104d4682 test-data/RandomForestClassifier.zip Binary file test-data/RandomForestClassifier.zip has changed diff -r 21d3e08b1a48 -r 82b6104d4682 test-data/StackingCVRegressor01.zip Binary file test-data/StackingCVRegressor01.zip has changed diff -r 21d3e08b1a48 -r 82b6104d4682 test-data/StackingRegressor02.zip Binary file test-data/StackingRegressor02.zip has changed diff -r 21d3e08b1a48 -r 82b6104d4682 test-data/StackingVoting03.zip Binary file test-data/StackingVoting03.zip has changed diff -r 21d3e08b1a48 -r 82b6104d4682 test-data/deepsear_1feature.json --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/deepsear_1feature.json Fri Aug 09 07:12:16 2019 -0400 @@ -0,0 +1,1 @@ +{"class_name": "Sequential", "config": {"name": "sequential_1", "layers": [{"class_name": "Conv1D", "config": {"name": "conv1d_1", "trainable": true, "batch_input_shape": [null, 1000, 4], "dtype": "float32", "filters": 320, "kernel_size": [8], "strides": [1], "padding": "valid", "data_format": "channels_last", "dilation_rate": [1], "activation": "relu", "use_bias": true, "kernel_initializer": {"class_name": "VarianceScaling", "config": {"scale": 1.0, "mode": "fan_avg", "distribution": "uniform", "seed": null}}, "bias_initializer": {"class_name": "Zeros", "config": {}}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}}, {"class_name": "MaxPooling1D", "config": {"name": "max_pooling1d_1", "trainable": true, "strides": [4], "pool_size": [4], "padding": "valid", "data_format": "channels_last"}}, {"class_name": "Dropout", "config": {"name": "dropout_1", "trainable": true, "rate": 0.2, "noise_shape": null, "seed": 999}}, {"class_name": "Conv1D", "config": {"name": "conv1d_2", "trainable": true, "filters": 480, "kernel_size": [8], "strides": [1], "padding": "valid", "data_format": "channels_last", "dilation_rate": [1], "activation": "relu", "use_bias": true, "kernel_initializer": {"class_name": "VarianceScaling", "config": {"scale": 1.0, "mode": "fan_avg", "distribution": "uniform", "seed": null}}, "bias_initializer": {"class_name": "Zeros", "config": {}}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}}, {"class_name": "MaxPooling1D", "config": {"name": "max_pooling1d_2", "trainable": true, "strides": [4], "pool_size": [4], "padding": "valid", "data_format": "channels_last"}}, {"class_name": "Dropout", "config": {"name": "dropout_2", "trainable": true, "rate": 0.2, "noise_shape": null, "seed": 999}}, {"class_name": "Conv1D", "config": {"name": "conv1d_3", "trainable": true, "filters": 960, "kernel_size": [8], "strides": [1], "padding": "valid", "data_format": "channels_last", "dilation_rate": [1], "activation": "relu", "use_bias": true, "kernel_initializer": {"class_name": "VarianceScaling", "config": {"scale": 1.0, "mode": "fan_avg", "distribution": "uniform", "seed": null}}, "bias_initializer": {"class_name": "Zeros", "config": {}}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}}, {"class_name": "Dropout", "config": {"name": "dropout_3", "trainable": true, "rate": 0.5, "noise_shape": null, "seed": 999}}, {"class_name": "Reshape", "config": {"name": "reshape_1", "trainable": true, "target_shape": [50880]}}, {"class_name": "Dense", "config": {"name": "dense_1", "trainable": true, "units": 1, "activation": "relu", "use_bias": true, "kernel_initializer": {"class_name": "VarianceScaling", "config": {"scale": 1.0, "mode": "fan_avg", "distribution": "uniform", "seed": null}}, "bias_initializer": {"class_name": "Zeros", "config": {}}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}}, {"class_name": "Dense", "config": {"name": "dense_2", "trainable": true, "units": 1, "activation": "sigmoid", "use_bias": true, "kernel_initializer": {"class_name": "VarianceScaling", "config": {"scale": 1.0, "mode": "fan_avg", "distribution": "uniform", "seed": null}}, "bias_initializer": {"class_name": "Zeros", "config": {}}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}}]}, "keras_version": "2.2.4", "backend": "tensorflow"} diff -r 21d3e08b1a48 -r 82b6104d4682 test-data/fitted_keras_g_regressor01.zip Binary file test-data/fitted_keras_g_regressor01.zip has changed diff -r 21d3e08b1a48 -r 82b6104d4682 test-data/keras01.json --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/keras01.json Fri Aug 09 07:12:16 2019 -0400 @@ -0,0 +1,1 @@ +{"class_name": "Sequential", "config": {"name": "sequential_1", "layers": [{"class_name": "Dense", "config": {"name": "dense_1", "trainable": true, "batch_input_shape": [null, 784], "dtype": "float32", "units": 32, "activation": "linear", "use_bias": true, "kernel_initializer": {"class_name": "VarianceScaling", "config": {"scale": 1.0, "mode": "fan_avg", "distribution": "uniform", "seed": null}}, "bias_initializer": {"class_name": "Zeros", "config": {}}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}}, {"class_name": "Activation", "config": {"name": "activation_1", "trainable": true, "activation": "relu"}}, {"class_name": "Dense", "config": {"name": "dense_2", "trainable": true, "units": 10, "activation": "linear", "use_bias": true, "kernel_initializer": {"class_name": "VarianceScaling", "config": {"scale": 1.0, "mode": "fan_avg", "distribution": "uniform", "seed": null}}, "bias_initializer": {"class_name": "Zeros", "config": {}}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}}, {"class_name": "Activation", "config": {"name": "activation_2", "trainable": true, "activation": "softmax"}}]}, "keras_version": "2.2.4", "backend": "tensorflow"} \ No newline at end of file diff -r 21d3e08b1a48 -r 82b6104d4682 test-data/keras02.json --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/keras02.json Fri Aug 09 07:12:16 2019 -0400 @@ -0,0 +1,1 @@ +{"class_name": "Model", "config": {"name": "model_1", "layers": [{"name": "main_input", "class_name": "InputLayer", "config": {"batch_input_shape": [null, 100], "dtype": "int32", "sparse": false, "name": "main_input"}, "inbound_nodes": []}, {"name": "embedding_1", "class_name": "Embedding", "config": {"name": "embedding_1", "trainable": true, "batch_input_shape": [null, 100], "dtype": "float32", "input_dim": 10000, "output_dim": 512, "embeddings_initializer": {"class_name": "RandomUniform", "config": {"minval": -0.05, "maxval": 0.05, "seed": null}}, "embeddings_regularizer": null, "activity_regularizer": null, "embeddings_constraint": null, "mask_zero": false, "input_length": 100}, "inbound_nodes": [[["main_input", 0, 0, {}]]]}, {"name": "lstm_1", "class_name": "LSTM", "config": {"name": "lstm_1", "trainable": true, "return_sequences": false, "return_state": false, "go_backwards": false, "stateful": false, "unroll": false, "units": 32, "activation": "linear", "recurrent_activation": "hard_sigmoid", "use_bias": true, "kernel_initializer": {"class_name": "VarianceScaling", "config": {"scale": 1.0, "mode": "fan_avg", "distribution": "uniform", "seed": null}}, "recurrent_initializer": {"class_name": "Orthogonal", "config": {"gain": 1.0, "seed": null}}, "bias_initializer": {"class_name": "Zeros", "config": {}}, "unit_forget_bias": true, "kernel_regularizer": null, "recurrent_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "recurrent_constraint": null, "bias_constraint": null, "dropout": 0.0, "recurrent_dropout": 0.0, "implementation": 1}, "inbound_nodes": [[["embedding_1", 0, 0, {}]]]}, {"name": "dense_1", "class_name": "Dense", "config": {"name": "dense_1", "trainable": true, "units": 1, "activation": "sigmoid", "use_bias": true, "kernel_initializer": {"class_name": "VarianceScaling", "config": {"scale": 1.0, "mode": "fan_avg", "distribution": "uniform", "seed": null}}, "bias_initializer": {"class_name": "Zeros", "config": {}}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}, "inbound_nodes": [[["lstm_1", 0, 0, {}]]]}, {"name": "aux_input", "class_name": "InputLayer", "config": {"batch_input_shape": [null, 5], "dtype": "float32", "sparse": false, "name": "aux_input"}, "inbound_nodes": []}, {"name": "concatenate_1", "class_name": "Concatenate", "config": {"name": "concatenate_1", "trainable": true, "axis": -1}, "inbound_nodes": [[["dense_1", 0, 0, {}], ["aux_input", 0, 0, {}]]]}, {"name": "dense_2", "class_name": "Dense", "config": {"name": "dense_2", "trainable": true, "units": 64, "activation": "relu", "use_bias": true, "kernel_initializer": {"class_name": "VarianceScaling", "config": {"scale": 1.0, "mode": "fan_avg", "distribution": "uniform", "seed": null}}, "bias_initializer": {"class_name": "Zeros", "config": {}}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}, "inbound_nodes": [[["concatenate_1", 0, 0, {}]]]}, {"name": "dense_3", "class_name": "Dense", "config": {"name": "dense_3", "trainable": true, "units": 64, "activation": "relu", "use_bias": true, "kernel_initializer": {"class_name": "VarianceScaling", "config": {"scale": 1.0, "mode": "fan_avg", "distribution": "uniform", "seed": null}}, "bias_initializer": {"class_name": "Zeros", "config": {}}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}, "inbound_nodes": [[["dense_2", 0, 0, {}]]]}, {"name": "dense_4", "class_name": "Dense", "config": {"name": "dense_4", "trainable": true, "units": 64, "activation": "relu", "use_bias": true, "kernel_initializer": {"class_name": "VarianceScaling", "config": {"scale": 1.0, "mode": "fan_avg", "distribution": "uniform", "seed": null}}, "bias_initializer": {"class_name": "Zeros", "config": {}}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}, "inbound_nodes": [[["dense_3", 0, 0, {}]]]}, {"name": "dense_5", "class_name": "Dense", "config": {"name": "dense_5", "trainable": true, "units": 1, "activation": "sigmoid", "use_bias": true, "kernel_initializer": {"class_name": "VarianceScaling", "config": {"scale": 1.0, "mode": "fan_avg", "distribution": "uniform", "seed": null}}, "bias_initializer": {"class_name": "Zeros", "config": {}}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}, "inbound_nodes": [[["dense_4", 0, 0, {}]]]}], "input_layers": [["main_input", 0, 0], ["aux_input", 0, 0]], "output_layers": [["dense_1", 0, 0], ["dense_5", 0, 0]]}, "keras_version": "2.2.4", "backend": "tensorflow"} \ No newline at end of file diff -r 21d3e08b1a48 -r 82b6104d4682 test-data/keras03.json --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/keras03.json Fri Aug 09 07:12:16 2019 -0400 @@ -0,0 +1,1 @@ +{"class_name": "Sequential", "config": {"name": "sequential_1", "layers": [{"class_name": "Dense", "config": {"name": "dense_1", "trainable": true, "batch_input_shape": [null, 17], "dtype": "float32", "units": 100, "activation": "linear", "use_bias": true, "kernel_initializer": {"class_name": "VarianceScaling", "config": {"scale": 1.0, "mode": "fan_avg", "distribution": "uniform", "seed": 0}}, "bias_initializer": {"class_name": "Zeros", "config": {}}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}}, {"class_name": "Dropout", "config": {"name": "dropout_1", "trainable": true, "rate": 0.1, "noise_shape": null, "seed": 0}}, {"class_name": "Dense", "config": {"name": "dense_2", "trainable": true, "units": 1, "activation": "linear", "use_bias": true, "kernel_initializer": {"class_name": "VarianceScaling", "config": {"scale": 1.0, "mode": "fan_avg", "distribution": "uniform", "seed": 0}}, "bias_initializer": {"class_name": "Zeros", "config": {}}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}}]}, "keras_version": "2.2.4", "backend": "tensorflow"} \ No newline at end of file diff -r 21d3e08b1a48 -r 82b6104d4682 test-data/keras04.json --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/keras04.json Fri Aug 09 07:12:16 2019 -0400 @@ -0,0 +1,1 @@ +{"class_name": "Sequential", "config": {"name": "sequential_1", "layers": [{"class_name": "Dense", "config": {"name": "dense_1", "trainable": true, "batch_input_shape": [null, 17], "dtype": "float32", "units": 32, "activation": "linear", "use_bias": true, "kernel_initializer": {"class_name": "VarianceScaling", "config": {"scale": 1.0, "mode": "fan_avg", "distribution": "uniform", "seed": null}}, "bias_initializer": {"class_name": "Zeros", "config": {}}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}}, {"class_name": "Activation", "config": {"name": "activation_1", "trainable": true, "activation": "linear"}}, {"class_name": "Dense", "config": {"name": "dense_2", "trainable": true, "units": 1, "activation": "linear", "use_bias": true, "kernel_initializer": {"class_name": "VarianceScaling", "config": {"scale": 1.0, "mode": "fan_avg", "distribution": "uniform", "seed": null}}, "bias_initializer": {"class_name": "Zeros", "config": {}}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}}, {"class_name": "Activation", "config": {"name": "activation_2", "trainable": true, "activation": "linear"}}]}, "keras_version": "2.2.4", "backend": "tensorflow"} \ No newline at end of file diff -r 21d3e08b1a48 -r 82b6104d4682 test-data/keras_batch_model01 Binary file test-data/keras_batch_model01 has changed diff -r 21d3e08b1a48 -r 82b6104d4682 test-data/keras_batch_model02 Binary file test-data/keras_batch_model02 has changed diff -r 21d3e08b1a48 -r 82b6104d4682 test-data/keras_batch_model03 Binary file test-data/keras_batch_model03 has changed diff -r 21d3e08b1a48 -r 82b6104d4682 test-data/keras_batch_params01.tabular --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/keras_batch_params01.tabular Fri Aug 09 07:12:16 2019 -0400 @@ -0,0 +1,94 @@ + Parameter Value +@ amsgrad amsgrad: None +@ batch_size batch_size: 32 +@ beta_1 beta_1: None +@ beta_2 beta_2: None +@ callbacks callbacks: [{'callback_selection': {'callback_type': 'None'}}] +@ class_positive_factor class_positive_factor: 1.0 +@ config config: {'name': 'sequential_1', 'layers': [{'class_name': 'Dense', 'config': {'name': 'dense_1', 'trainable +@ data_batch_generator "data_batch_generator: FastaDNABatchGenerator(fasta_path='to_be_determined', seed=999, + seq_length=1000, shuffle=True)" +@ decay decay: 0.0 +@ epochs epochs: 100 +@ epsilon epsilon: None +@ layers_0_Dense layers_0_Dense: {'class_name': 'Dense', 'config': {'name': 'dense_1', 'trainable': True, 'batch_input_shape': [None, +@ layers_1_Activation layers_1_Activation: {'class_name': 'Activation', 'config': {'name': 'activation_1', 'trainable': True, 'activation': 're +@ layers_2_Dense layers_2_Dense: {'class_name': 'Dense', 'config': {'name': 'dense_2', 'trainable': True, 'units': 10, 'activation': +@ layers_3_Activation layers_3_Activation: {'class_name': 'Activation', 'config': {'name': 'activation_2', 'trainable': True, 'activation': 'so +@ loss loss: 'binary_crossentropy' +@ lr lr: 0.01 +@ metrics metrics: ['acc'] +@ model_type model_type: 'sequential' +@ momentum momentum: 0.0 +* n_jobs n_jobs: 1 +@ nesterov nesterov: False +@ optimizer optimizer: 'sgd' +@ prediction_steps prediction_steps: None +@ rho rho: None +@ schedule_decay schedule_decay: None +@ seed seed: None +@ steps_per_epoch steps_per_epoch: None +@ validation_data validation_data: None +@ validation_steps validation_steps: None +@ verbose verbose: 0 +* data_batch_generator__fasta_path data_batch_generator__fasta_path: 'to_be_determined' +@ data_batch_generator__seed data_batch_generator__seed: 999 +@ data_batch_generator__seq_length data_batch_generator__seq_length: 1000 +@ data_batch_generator__shuffle data_batch_generator__shuffle: True +* layers_0_Dense__class_name layers_0_Dense__class_name: 'Dense' +@ layers_0_Dense__config layers_0_Dense__config: {'name': 'dense_1', 'trainable': True, 'batch_input_shape': [None, 784], 'dtype': 'float32', 'units' +@ layers_0_Dense__config__activation layers_0_Dense__config__activation: 'linear' +@ layers_0_Dense__config__activity_regularizer layers_0_Dense__config__activity_regularizer: None +@ layers_0_Dense__config__batch_input_shape layers_0_Dense__config__batch_input_shape: [None, 784] +@ layers_0_Dense__config__bias_constraint layers_0_Dense__config__bias_constraint: None +@ layers_0_Dense__config__bias_initializer layers_0_Dense__config__bias_initializer: {'class_name': 'Zeros', 'config': {}} +* layers_0_Dense__config__bias_initializer__class_name layers_0_Dense__config__bias_initializer__class_name: 'Zeros' +@ layers_0_Dense__config__bias_initializer__config layers_0_Dense__config__bias_initializer__config: {} +@ layers_0_Dense__config__bias_regularizer layers_0_Dense__config__bias_regularizer: None +@ layers_0_Dense__config__dtype layers_0_Dense__config__dtype: 'float32' +@ layers_0_Dense__config__kernel_constraint layers_0_Dense__config__kernel_constraint: None +@ layers_0_Dense__config__kernel_initializer layers_0_Dense__config__kernel_initializer: {'class_name': 'VarianceScaling', 'config': {'scale': 1.0, 'mode': 'fan_avg', 'distribution': 'unifo +* layers_0_Dense__config__kernel_initializer__class_name layers_0_Dense__config__kernel_initializer__class_name: 'VarianceScaling' +@ layers_0_Dense__config__kernel_initializer__config layers_0_Dense__config__kernel_initializer__config: {'scale': 1.0, 'mode': 'fan_avg', 'distribution': 'uniform', 'seed': None} +@ layers_0_Dense__config__kernel_initializer__config__distribution layers_0_Dense__config__kernel_initializer__config__distribution: 'uniform' +@ layers_0_Dense__config__kernel_initializer__config__mode layers_0_Dense__config__kernel_initializer__config__mode: 'fan_avg' +@ layers_0_Dense__config__kernel_initializer__config__scale layers_0_Dense__config__kernel_initializer__config__scale: 1.0 +@ layers_0_Dense__config__kernel_initializer__config__seed layers_0_Dense__config__kernel_initializer__config__seed: None +@ layers_0_Dense__config__kernel_regularizer layers_0_Dense__config__kernel_regularizer: None +* layers_0_Dense__config__name layers_0_Dense__config__name: 'dense_1' +@ layers_0_Dense__config__trainable layers_0_Dense__config__trainable: True +@ layers_0_Dense__config__units layers_0_Dense__config__units: 32 +@ layers_0_Dense__config__use_bias layers_0_Dense__config__use_bias: True +* layers_1_Activation__class_name layers_1_Activation__class_name: 'Activation' +@ layers_1_Activation__config layers_1_Activation__config: {'name': 'activation_1', 'trainable': True, 'activation': 'relu'} +@ layers_1_Activation__config__activation layers_1_Activation__config__activation: 'relu' +* layers_1_Activation__config__name layers_1_Activation__config__name: 'activation_1' +@ layers_1_Activation__config__trainable layers_1_Activation__config__trainable: True +* layers_2_Dense__class_name layers_2_Dense__class_name: 'Dense' +@ layers_2_Dense__config layers_2_Dense__config: {'name': 'dense_2', 'trainable': True, 'units': 10, 'activation': 'linear', 'use_bias': True, 'kerne +@ layers_2_Dense__config__activation layers_2_Dense__config__activation: 'linear' +@ layers_2_Dense__config__activity_regularizer layers_2_Dense__config__activity_regularizer: None +@ layers_2_Dense__config__bias_constraint layers_2_Dense__config__bias_constraint: None +@ layers_2_Dense__config__bias_initializer layers_2_Dense__config__bias_initializer: {'class_name': 'Zeros', 'config': {}} +* layers_2_Dense__config__bias_initializer__class_name layers_2_Dense__config__bias_initializer__class_name: 'Zeros' +@ layers_2_Dense__config__bias_initializer__config layers_2_Dense__config__bias_initializer__config: {} +@ layers_2_Dense__config__bias_regularizer layers_2_Dense__config__bias_regularizer: None +@ layers_2_Dense__config__kernel_constraint layers_2_Dense__config__kernel_constraint: None +@ layers_2_Dense__config__kernel_initializer layers_2_Dense__config__kernel_initializer: {'class_name': 'VarianceScaling', 'config': {'scale': 1.0, 'mode': 'fan_avg', 'distribution': 'unifo +* layers_2_Dense__config__kernel_initializer__class_name layers_2_Dense__config__kernel_initializer__class_name: 'VarianceScaling' +@ layers_2_Dense__config__kernel_initializer__config layers_2_Dense__config__kernel_initializer__config: {'scale': 1.0, 'mode': 'fan_avg', 'distribution': 'uniform', 'seed': None} +@ layers_2_Dense__config__kernel_initializer__config__distribution layers_2_Dense__config__kernel_initializer__config__distribution: 'uniform' +@ layers_2_Dense__config__kernel_initializer__config__mode layers_2_Dense__config__kernel_initializer__config__mode: 'fan_avg' +@ layers_2_Dense__config__kernel_initializer__config__scale layers_2_Dense__config__kernel_initializer__config__scale: 1.0 +@ layers_2_Dense__config__kernel_initializer__config__seed layers_2_Dense__config__kernel_initializer__config__seed: None +@ layers_2_Dense__config__kernel_regularizer layers_2_Dense__config__kernel_regularizer: None +* layers_2_Dense__config__name layers_2_Dense__config__name: 'dense_2' +@ layers_2_Dense__config__trainable layers_2_Dense__config__trainable: True +@ layers_2_Dense__config__units layers_2_Dense__config__units: 10 +@ layers_2_Dense__config__use_bias layers_2_Dense__config__use_bias: True +* layers_3_Activation__class_name layers_3_Activation__class_name: 'Activation' +@ layers_3_Activation__config layers_3_Activation__config: {'name': 'activation_2', 'trainable': True, 'activation': 'softmax'} +@ layers_3_Activation__config__activation layers_3_Activation__config__activation: 'softmax' +* layers_3_Activation__config__name layers_3_Activation__config__name: 'activation_2' +@ layers_3_Activation__config__trainable layers_3_Activation__config__trainable: True + Note: @, params eligible for search in searchcv tool. diff -r 21d3e08b1a48 -r 82b6104d4682 test-data/keras_model01 Binary file test-data/keras_model01 has changed diff -r 21d3e08b1a48 -r 82b6104d4682 test-data/keras_model02 Binary file test-data/keras_model02 has changed diff -r 21d3e08b1a48 -r 82b6104d4682 test-data/keras_model04 Binary file test-data/keras_model04 has changed diff -r 21d3e08b1a48 -r 82b6104d4682 test-data/keras_params04.tabular --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/keras_params04.tabular Fri Aug 09 07:12:16 2019 -0400 @@ -0,0 +1,85 @@ + Parameter Value +@ amsgrad amsgrad: False +@ batch_size batch_size: 32 +@ beta_1 beta_1: 0.9 +@ beta_2 beta_2: 0.999 +@ callbacks callbacks: [{'callback_selection': {'callback_type': 'None'}}] +@ config config: {'name': 'sequential_1', 'layers': [{'class_name': 'Dense', 'config': {'name': 'dense_1', 'trainable +@ decay decay: 0.0 +@ epochs epochs: 100 +@ epsilon epsilon: None +@ layers_0_Dense layers_0_Dense: {'class_name': 'Dense', 'config': {'name': 'dense_1', 'trainable': True, 'batch_input_shape': [None, +@ layers_1_Activation layers_1_Activation: {'class_name': 'Activation', 'config': {'name': 'activation_1', 'trainable': True, 'activation': 'li +@ layers_2_Dense layers_2_Dense: {'class_name': 'Dense', 'config': {'name': 'dense_2', 'trainable': True, 'units': 1, 'activation': ' +@ layers_3_Activation layers_3_Activation: {'class_name': 'Activation', 'config': {'name': 'activation_2', 'trainable': True, 'activation': 'li +@ loss loss: 'mean_squared_error' +@ lr lr: 0.001 +@ metrics metrics: ['mse'] +@ model_type model_type: 'sequential' +@ momentum momentum: None +@ nesterov nesterov: None +@ optimizer optimizer: 'adam' +@ rho rho: None +@ schedule_decay schedule_decay: None +@ seed seed: 42 +@ steps_per_epoch steps_per_epoch: None +@ validation_data validation_data: None +@ validation_steps validation_steps: None +@ verbose verbose: 0 +* layers_0_Dense__class_name layers_0_Dense__class_name: 'Dense' +@ layers_0_Dense__config layers_0_Dense__config: {'name': 'dense_1', 'trainable': True, 'batch_input_shape': [None, 17], 'dtype': 'float32', 'units': +@ layers_0_Dense__config__activation layers_0_Dense__config__activation: 'linear' +@ layers_0_Dense__config__activity_regularizer layers_0_Dense__config__activity_regularizer: None +@ layers_0_Dense__config__batch_input_shape layers_0_Dense__config__batch_input_shape: [None, 17] +@ layers_0_Dense__config__bias_constraint layers_0_Dense__config__bias_constraint: None +@ layers_0_Dense__config__bias_initializer layers_0_Dense__config__bias_initializer: {'class_name': 'Zeros', 'config': {}} +* layers_0_Dense__config__bias_initializer__class_name layers_0_Dense__config__bias_initializer__class_name: 'Zeros' +@ layers_0_Dense__config__bias_initializer__config layers_0_Dense__config__bias_initializer__config: {} +@ layers_0_Dense__config__bias_regularizer layers_0_Dense__config__bias_regularizer: None +@ layers_0_Dense__config__dtype layers_0_Dense__config__dtype: 'float32' +@ layers_0_Dense__config__kernel_constraint layers_0_Dense__config__kernel_constraint: None +@ layers_0_Dense__config__kernel_initializer layers_0_Dense__config__kernel_initializer: {'class_name': 'VarianceScaling', 'config': {'scale': 1.0, 'mode': 'fan_avg', 'distribution': 'unifo +* layers_0_Dense__config__kernel_initializer__class_name layers_0_Dense__config__kernel_initializer__class_name: 'VarianceScaling' +@ layers_0_Dense__config__kernel_initializer__config layers_0_Dense__config__kernel_initializer__config: {'scale': 1.0, 'mode': 'fan_avg', 'distribution': 'uniform', 'seed': None} +@ layers_0_Dense__config__kernel_initializer__config__distribution layers_0_Dense__config__kernel_initializer__config__distribution: 'uniform' +@ layers_0_Dense__config__kernel_initializer__config__mode layers_0_Dense__config__kernel_initializer__config__mode: 'fan_avg' +@ layers_0_Dense__config__kernel_initializer__config__scale layers_0_Dense__config__kernel_initializer__config__scale: 1.0 +@ layers_0_Dense__config__kernel_initializer__config__seed layers_0_Dense__config__kernel_initializer__config__seed: None +@ layers_0_Dense__config__kernel_regularizer layers_0_Dense__config__kernel_regularizer: None +* layers_0_Dense__config__name layers_0_Dense__config__name: 'dense_1' +@ layers_0_Dense__config__trainable layers_0_Dense__config__trainable: True +@ layers_0_Dense__config__units layers_0_Dense__config__units: 32 +@ layers_0_Dense__config__use_bias layers_0_Dense__config__use_bias: True +* layers_1_Activation__class_name layers_1_Activation__class_name: 'Activation' +@ layers_1_Activation__config layers_1_Activation__config: {'name': 'activation_1', 'trainable': True, 'activation': 'linear'} +@ layers_1_Activation__config__activation layers_1_Activation__config__activation: 'linear' +* layers_1_Activation__config__name layers_1_Activation__config__name: 'activation_1' +@ layers_1_Activation__config__trainable layers_1_Activation__config__trainable: True +* layers_2_Dense__class_name layers_2_Dense__class_name: 'Dense' +@ layers_2_Dense__config layers_2_Dense__config: {'name': 'dense_2', 'trainable': True, 'units': 1, 'activation': 'linear', 'use_bias': True, 'kernel +@ layers_2_Dense__config__activation layers_2_Dense__config__activation: 'linear' +@ layers_2_Dense__config__activity_regularizer layers_2_Dense__config__activity_regularizer: None +@ layers_2_Dense__config__bias_constraint layers_2_Dense__config__bias_constraint: None +@ layers_2_Dense__config__bias_initializer layers_2_Dense__config__bias_initializer: {'class_name': 'Zeros', 'config': {}} +* layers_2_Dense__config__bias_initializer__class_name layers_2_Dense__config__bias_initializer__class_name: 'Zeros' +@ layers_2_Dense__config__bias_initializer__config layers_2_Dense__config__bias_initializer__config: {} +@ layers_2_Dense__config__bias_regularizer layers_2_Dense__config__bias_regularizer: None +@ layers_2_Dense__config__kernel_constraint layers_2_Dense__config__kernel_constraint: None +@ layers_2_Dense__config__kernel_initializer layers_2_Dense__config__kernel_initializer: {'class_name': 'VarianceScaling', 'config': {'scale': 1.0, 'mode': 'fan_avg', 'distribution': 'unifo +* layers_2_Dense__config__kernel_initializer__class_name layers_2_Dense__config__kernel_initializer__class_name: 'VarianceScaling' +@ layers_2_Dense__config__kernel_initializer__config layers_2_Dense__config__kernel_initializer__config: {'scale': 1.0, 'mode': 'fan_avg', 'distribution': 'uniform', 'seed': None} +@ layers_2_Dense__config__kernel_initializer__config__distribution layers_2_Dense__config__kernel_initializer__config__distribution: 'uniform' +@ layers_2_Dense__config__kernel_initializer__config__mode layers_2_Dense__config__kernel_initializer__config__mode: 'fan_avg' +@ layers_2_Dense__config__kernel_initializer__config__scale layers_2_Dense__config__kernel_initializer__config__scale: 1.0 +@ layers_2_Dense__config__kernel_initializer__config__seed layers_2_Dense__config__kernel_initializer__config__seed: None +@ layers_2_Dense__config__kernel_regularizer layers_2_Dense__config__kernel_regularizer: None +* layers_2_Dense__config__name layers_2_Dense__config__name: 'dense_2' +@ layers_2_Dense__config__trainable layers_2_Dense__config__trainable: True +@ layers_2_Dense__config__units layers_2_Dense__config__units: 1 +@ layers_2_Dense__config__use_bias layers_2_Dense__config__use_bias: True +* layers_3_Activation__class_name layers_3_Activation__class_name: 'Activation' +@ layers_3_Activation__config layers_3_Activation__config: {'name': 'activation_2', 'trainable': True, 'activation': 'linear'} +@ layers_3_Activation__config__activation layers_3_Activation__config__activation: 'linear' +* layers_3_Activation__config__name layers_3_Activation__config__name: 'activation_2' +@ layers_3_Activation__config__trainable layers_3_Activation__config__trainable: True + Note: @, params eligible for search in searchcv tool. diff -r 21d3e08b1a48 -r 82b6104d4682 test-data/keras_prefitted01.zip Binary file test-data/keras_prefitted01.zip has changed diff -r 21d3e08b1a48 -r 82b6104d4682 test-data/keras_save_weights01.h5 Binary file test-data/keras_save_weights01.h5 has changed diff -r 21d3e08b1a48 -r 82b6104d4682 test-data/model_pred01.tabular --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/model_pred01.tabular Fri Aug 09 07:12:16 2019 -0400 @@ -0,0 +1,262 @@ +Predicted +71.129364 +60.96111 +77.885765 +57.212738 +51.806957 +52.089592 +51.571884 +80.762184 +36.772987 +41.643093 +46.386948 +77.97063 +72.768776 +40.0386 +79.81385 +74.40216 +52.089592 +75.51107 +55.705868 +39.944202 +49.643826 +59.17941 +69.848915 +64.62096 +48.310116 +43.391766 +68.25893 +60.198105 +65.16974 +72.130005 +56.351482 +53.20132 +56.86578 +54.342987 +43.521133 +59.663773 +66.097626 +51.960022 +41.559486 +45.16049 +66.40008 +71.488754 +45.16049 +63.34996 +69.83631 +55.652687 +61.311596 +71.85501 +75.12588 +54.93247 +70.09855 +74.20223 +57.898273 +55.23022 +75.70524 +66.94729 +65.12762 +59.3189 +61.22922 +61.2382 +54.017147 +51.633373 +51.633373 +65.16974 +65.16873 +57.874527 +59.740753 +43.990814 +66.06423 +64.436615 +41.245773 +63.278465 +63.27533 +71.13793 +65.47819 +72.620995 +62.598015 +36.986706 +73.2002 +71.966644 +72.912926 +75.46711 +55.12616 +46.19641 +87.20736 +72.11753 +57.952766 +84.67858 +69.21688 +64.257095 +43.59384 +44.723145 +67.051605 +50.021965 +69.202095 +75.10072 +70.80699 +83.08025 +69.62026 +42.441116 +64.38655 +59.430386 +69.366035 +73.87479 +59.973484 +75.76153 +56.195892 +71.16636 +60.419106 +61.630756 +51.81593 +54.924137 +60.73048 +78.496635 +77.921555 +73.66453 +60.904953 +71.26717 +72.01454 +53.52841 +46.66952 +54.504898 +56.28563 +59.398067 +72.71433 +51.745968 +67.80466 +51.571823 +52.010742 +54.19355 +74.193825 +64.57627 +67.48214 +68.41867 +82.102806 +55.8638 +76.90198 +62.577324 +73.70229 +78.93923 +73.51925 +54.81887 +65.2422 +59.700085 +84.08965 +64.35592 +54.001873 +41.397793 +64.64837 +62.784557 +42.990005 +45.430832 +52.089592 +60.374348 +51.67288 +62.4257 +79.536285 +76.4169 +55.978775 +74.43581 +76.89248 +65.3203 +72.10233 +59.23278 +51.736633 +73.13266 +59.45746 +73.0939 +70.58273 +53.08009 +49.893116 +73.89228 +52.64392 +54.801548 +63.534626 +68.1002 +63.70472 +63.8851 +63.268097 +62.438057 +61.989746 +71.47914 +73.92875 +48.089043 +54.874943 +50.261494 +69.11724 +57.448387 +50.528027 +58.67657 +73.969376 +53.745205 +74.81751 +85.582954 +75.10767 +48.855537 +70.66616 +41.341694 +48.55276 +63.48302 +73.02358 +69.50546 +55.603634 +74.26824 +76.03213 +62.601646 +81.99045 +59.26651 +44.504597 +53.54178 +55.247334 +82.123795 +51.84111 +66.27524 +66.23033 +58.565033 +67.452 +72.54107 +49.840427 +70.26608 +62.447872 +67.045 +42.600086 +64.88309 +55.31232 +39.07865 +71.81975 +59.447086 +53.20132 +75.12621 +72.9902 +53.1043 +72.42816 +72.10233 +55.836628 +53.2467 +74.670074 +74.5721 +54.103737 +49.212822 +67.238785 +60.09495 +74.5011 +63.0043 +67.7362 +53.029213 +74.860016 +78.597946 +75.369064 +60.000134 +68.83947 +40.24504 +81.21449 +61.465557 +42.74572 +52.089592 +73.162025 +52.033802 +79.690926 +62.542553 +59.557045 diff -r 21d3e08b1a48 -r 82b6104d4682 test-data/model_pred02.tabular --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/model_pred02.tabular Fri Aug 09 07:12:16 2019 -0400 @@ -0,0 +1,262 @@ +Predicted +71.08584 +61.23427 +75.80197 +66.19323 +52.35754 +52.987312 +51.777576 +75.66966 +49.61427 +51.20531 +49.255173 +76.143936 +74.00767 +50.80104 +72.37281 +68.69481 +52.816956 +76.27541 +57.82054 +49.72029 +52.400383 +57.968666 +61.28138 +58.4683 +53.114418 +50.45093 +67.63649 +60.31344 +66.52325 +72.48887 +58.755577 +53.332912 +55.175415 +53.437675 +50.452156 +61.153603 +66.69711 +51.1279 +51.37375 +50.732525 +67.677734 +74.2334 +51.287792 +70.154366 +68.460396 +58.35005 +59.828957 +74.98557 +73.3624 +54.043793 +73.04924 +77.22285 +59.452316 +56.143288 +74.41183 +60.254143 +67.18662 +63.53044 +60.43683 +60.07025 +57.257767 +52.143753 +52.872334 +67.748436 +63.986977 +55.532387 +59.70022 +49.43772 +65.30266 +67.30055 +49.907486 +57.864845 +56.207542 +70.46542 +55.503044 +73.822784 +63.741142 +49.693428 +71.36254 +71.87617 +72.02608 +65.63652 +54.059746 +51.300495 +76.06125 +73.98534 +63.071587 +75.93381 +69.479454 +63.85415 +51.218174 +49.468956 +68.23912 +50.83457 +70.77809 +72.129776 +74.53812 +68.9107 +72.47451 +50.62992 +62.99655 +56.105698 +72.927025 +65.86492 +58.282486 +75.063446 +54.558403 +65.59456 +57.257263 +58.336494 +51.988983 +57.355415 +56.631332 +62.632957 +76.11209 +76.99285 +65.670746 +74.464355 +68.042145 +54.761986 +51.070145 +56.55138 +55.53712 +57.753426 +75.02803 +57.397556 +71.05187 +51.134808 +53.119152 +52.581924 +70.8574 +66.85955 +67.29634 +66.589584 +76.06389 +54.559666 +60.37111 +63.455887 +72.6416 +75.51883 +63.990837 +53.491386 +59.82952 +60.56826 +76.53373 +66.729385 +52.592728 +48.729107 +68.03414 +56.391117 +50.800247 +50.053703 +52.03207 +55.326523 +52.58854 +60.38707 +75.923096 +75.2882 +54.893684 +78.00183 +76.06732 +60.791916 +70.38205 +60.582397 +53.582005 +77.20325 +54.903778 +68.63178 +70.27207 +54.5502 +53.928703 +74.93919 +52.267735 +51.70433 +59.89312 +74.00166 +66.61868 +70.04806 +55.62455 +65.638214 +55.330837 +65.8484 +65.45604 +50.942883 +56.04741 +52.147808 +69.9472 +52.90547 +51.568893 +57.65322 +76.28175 +53.421043 +73.63155 +77.357666 +77.49912 +51.669907 +67.80663 +49.745773 +52.792336 +62.308838 +76.21391 +70.10635 +53.58763 +76.36336 +75.63791 +66.51898 +59.851395 +53.114918 +50.095005 +54.76951 +58.387985 +76.39301 +53.754196 +66.004395 +59.4105 +53.724583 +63.857407 +70.29119 +50.46862 +58.864563 +61.946457 +70.4472 +50.738815 +65.65154 +52.600437 +49.42977 +70.38036 +56.012196 +53.824024 +71.119225 +75.3495 +49.078987 +74.36192 +71.18959 +54.9702 +54.477818 +72.231705 +68.62958 +52.298077 +52.34682 +70.110405 +60.08683 +74.98835 +55.85307 +66.53965 +53.608902 +67.770744 +66.93648 +68.07121 +59.94021 +58.784706 +50.237366 +77.0887 +65.06997 +50.1484 +51.08928 +74.907234 +56.82161 +62.303955 +62.67704 +61.49601 diff -r 21d3e08b1a48 -r 82b6104d4682 test-data/pipeline01 Binary file test-data/pipeline01 has changed diff -r 21d3e08b1a48 -r 82b6104d4682 test-data/pipeline02 Binary file test-data/pipeline02 has changed diff -r 21d3e08b1a48 -r 82b6104d4682 test-data/pipeline03 Binary file test-data/pipeline03 has changed diff -r 21d3e08b1a48 -r 82b6104d4682 test-data/pipeline04 Binary file test-data/pipeline04 has changed diff -r 21d3e08b1a48 -r 82b6104d4682 test-data/pipeline05 Binary file test-data/pipeline05 has changed diff -r 21d3e08b1a48 -r 82b6104d4682 test-data/pipeline06 Binary file test-data/pipeline06 has changed diff -r 21d3e08b1a48 -r 82b6104d4682 test-data/pipeline07 Binary file test-data/pipeline07 has changed diff -r 21d3e08b1a48 -r 82b6104d4682 test-data/pipeline08 Binary file test-data/pipeline08 has changed diff -r 21d3e08b1a48 -r 82b6104d4682 test-data/pipeline09 Binary file test-data/pipeline09 has changed diff -r 21d3e08b1a48 -r 82b6104d4682 test-data/pipeline10 Binary file test-data/pipeline10 has changed diff -r 21d3e08b1a48 -r 82b6104d4682 test-data/pipeline11 Binary file test-data/pipeline11 has changed diff -r 21d3e08b1a48 -r 82b6104d4682 test-data/pipeline12 Binary file test-data/pipeline12 has changed diff -r 21d3e08b1a48 -r 82b6104d4682 test-data/pipeline13 Binary file test-data/pipeline13 has changed diff -r 21d3e08b1a48 -r 82b6104d4682 test-data/pipeline14 Binary file test-data/pipeline14 has changed diff -r 21d3e08b1a48 -r 82b6104d4682 test-data/pipeline15 Binary file test-data/pipeline15 has changed diff -r 21d3e08b1a48 -r 82b6104d4682 test-data/pipeline16 Binary file test-data/pipeline16 has changed diff -r 21d3e08b1a48 -r 82b6104d4682 test-data/prp_model01 Binary file test-data/prp_model01 has changed diff -r 21d3e08b1a48 -r 82b6104d4682 test-data/prp_model02 Binary file test-data/prp_model02 has changed diff -r 21d3e08b1a48 -r 82b6104d4682 test-data/prp_model04 Binary file test-data/prp_model04 has changed diff -r 21d3e08b1a48 -r 82b6104d4682 test-data/prp_model05 Binary file test-data/prp_model05 has changed diff -r 21d3e08b1a48 -r 82b6104d4682 test-data/prp_model07 Binary file test-data/prp_model07 has changed diff -r 21d3e08b1a48 -r 82b6104d4682 test-data/prp_model08 Binary file test-data/prp_model08 has changed diff -r 21d3e08b1a48 -r 82b6104d4682 test-data/prp_model09 Binary file test-data/prp_model09 has changed diff -r 21d3e08b1a48 -r 82b6104d4682 test-data/prp_result10 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/prp_result10 Fri Aug 09 07:12:16 2019 -0400 @@ -0,0 +1,262 @@ +year month day temp_2 temp_1 average forecast_noaa forecast_acc forecast_under friend week_Fri week_Mon week_Sat week_Sun week_Thurs week_Tues week_Wed +-1.0 0.4545454545454546 0.19999999999999996 0.22222222222222188 -0.17073170731707288 0.5232198142414863 0.33333333333333304 0.6000000000000001 0.5428571428571427 0.791044776119403 -1.0 1.0 -1.0 -1.0 -1.0 -1.0 -1.0 +-1.0 -0.4545454545454546 -0.1333333333333333 -0.07407407407407396 -0.41463414634146334 -0.195046439628483 -0.11111111111111116 -0.02857142857142847 -0.20000000000000018 0.13432835820895517 -1.0 -1.0 -1.0 -1.0 1.0 -1.0 -1.0 +-1.0 0.09090909090909083 0.9333333333333333 0.8518518518518516 0.29268292682926855 0.9938080495356032 0.8888888888888884 0.8857142857142857 0.8857142857142852 0.25373134328358193 -1.0 -1.0 1.0 -1.0 -1.0 -1.0 -1.0 +-1.0 -0.2727272727272727 -0.06666666666666665 0.7407407407407405 -0.26829268292682906 0.21362229102167207 0.22222222222222232 0.31428571428571406 0.1428571428571428 -0.10447761194029859 -1.0 -1.0 -1.0 1.0 -1.0 -1.0 -1.0 +-1.0 -1.0 0.1333333333333333 -0.2962962962962963 -0.6341463414634145 -0.8513931888544892 -0.8333333333333335 -0.8857142857142857 -0.7142857142857144 -0.10447761194029859 -1.0 1.0 -1.0 -1.0 -1.0 -1.0 -1.0 +-1.0 -1.0 0.6000000000000001 -0.5185185185185186 -0.6097560975609755 -0.8080495356037152 -0.7777777777777777 -0.7142857142857144 -0.7142857142857144 0.04477611940298498 -1.0 1.0 -1.0 -1.0 -1.0 -1.0 -1.0 +-1.0 0.8181818181818181 0.6000000000000001 -0.4814814814814816 -0.5853658536585364 -0.7832817337461302 -0.7777777777777777 -0.657142857142857 -0.8285714285714287 -0.6119402985074627 1.0 -1.0 -1.0 -1.0 -1.0 -1.0 -1.0 +-1.0 0.09090909090909083 0.2666666666666666 0.40740740740740744 0.048780487804878314 0.956656346749226 0.8888888888888884 0.8285714285714287 0.8857142857142852 0.13432835820895517 -1.0 -1.0 -1.0 -1.0 -1.0 -1.0 1.0 +-1.0 0.9999999999999998 0.06666666666666665 -0.8518518518518519 -0.9999999999999999 -0.9938080495356036 -0.8888888888888888 -0.9428571428571431 -0.8857142857142857 -0.7014925373134329 -1.0 -1.0 1.0 -1.0 -1.0 -1.0 -1.0 +-1.0 0.9999999999999998 -0.5333333333333333 -0.7407407407407409 -0.8780487804878048 -0.9380804953560373 -0.7777777777777777 -0.7142857142857144 -0.8285714285714287 -0.7611940298507462 -1.0 -1.0 -1.0 -1.0 1.0 -1.0 -1.0 +-1.0 0.9999999999999998 0.8 -0.7407407407407409 -0.7073170731707314 -0.9876160990712077 -1.0 -0.8285714285714287 -1.0 -0.10447761194029859 -1.0 -1.0 -1.0 -1.0 -1.0 -1.0 1.0 +-1.0 0.09090909090909083 0.06666666666666665 0.5185185185185186 -0.09756097560975596 0.9318885448916405 0.9444444444444446 0.8285714285714287 0.8857142857142852 0.791044776119403 -1.0 -1.0 -1.0 1.0 -1.0 -1.0 -1.0 +-1.0 0.09090909090909083 -0.6 0.2592592592592591 2.220446049250313e-16 0.8142414860681115 0.7777777777777777 0.7714285714285709 0.7142857142857144 0.31343283582089554 -1.0 -1.0 -1.0 -1.0 1.0 -1.0 -1.0 +-1.0 0.9999999999999998 -0.06666666666666665 -0.8148148148148149 -0.9024390243902437 -0.9876160990712077 -0.7777777777777777 -0.8285714285714287 -0.8285714285714287 -0.4626865671641791 -1.0 -1.0 -1.0 -1.0 1.0 -1.0 -1.0 +-1.0 -0.09090909090909105 0.7333333333333334 0.33333333333333304 0.048780487804878314 0.6780185758513935 0.6111111111111112 0.6000000000000001 0.5999999999999996 0.6716417910447763 -1.0 1.0 -1.0 -1.0 -1.0 -1.0 -1.0 +-1.0 -0.2727272727272727 0.9999999999999998 0.07407407407407396 -0.12195121951219501 0.3746130030959747 0.22222222222222232 0.4857142857142853 0.37142857142857144 0.7014925373134326 -1.0 -1.0 -1.0 -1.0 -1.0 1.0 -1.0 +-1.0 -1.0 0.2666666666666666 -0.2962962962962963 -0.6829268292682924 -0.8390092879256965 -0.8333333333333335 -0.657142857142857 -0.7142857142857144 -0.014925373134328401 -1.0 -1.0 -1.0 -1.0 -1.0 -1.0 1.0 +-1.0 0.2727272727272727 -0.4 0.40740740740740744 -0.09756097560975596 0.9752321981424141 1.0 0.8285714285714287 0.8857142857142852 0.19402985074626877 -1.0 -1.0 -1.0 -1.0 -1.0 -1.0 1.0 +-1.0 -0.6363636363636365 0.46666666666666656 -0.22222222222222232 -0.4634146341463412 -0.40557275541795645 -0.5 -0.3142857142857145 -0.37142857142857144 0.25373134328358193 -1.0 -1.0 -1.0 -1.0 -1.0 -1.0 1.0 +-1.0 0.9999999999999998 0.5333333333333334 -0.6296296296296298 -0.8780487804878048 -1.0 -0.8333333333333335 -0.9428571428571431 -0.8857142857142857 -0.6716417910447761 -1.0 -1.0 1.0 -1.0 -1.0 -1.0 -1.0 +-1.0 -1.0 0.19999999999999996 -0.44444444444444464 -0.5365853658536583 -0.8452012383900929 -0.6666666666666665 -0.8285714285714287 -0.7714285714285714 -0.25373134328358216 -1.0 -1.0 -1.0 -1.0 -1.0 1.0 -1.0 +-1.0 0.8181818181818181 -0.6666666666666666 0.11111111111111116 -0.43902439024390216 -0.4984520123839009 -0.38888888888888884 -0.37142857142857144 -0.37142857142857144 0.28358208955223874 -1.0 -1.0 -1.0 1.0 -1.0 -1.0 -1.0 +-1.0 -0.4545454545454546 0.06666666666666665 -0.07407407407407396 -0.19512195121951192 -0.16408668730650167 -0.0555555555555558 -0.08571428571428585 -0.1428571428571428 -0.22388059701492535 -1.0 -1.0 -1.0 1.0 -1.0 -1.0 -1.0 +-1.0 0.6363636363636365 0.8666666666666667 -0.07407407407407396 -0.26829268292682906 -0.3684210526315792 -0.22222222222222232 -0.2571428571428571 -0.37142857142857144 0.10447761194029859 -1.0 -1.0 1.0 -1.0 -1.0 -1.0 -1.0 +-1.0 -0.8181818181818182 -1.0 -0.5185185185185186 -0.7073170731707314 -0.7708978328173379 -0.7222222222222223 -0.8285714285714287 -0.7142857142857144 -0.31343283582089554 -1.0 1.0 -1.0 -1.0 -1.0 -1.0 -1.0 +-1.0 0.9999999999999998 -0.2666666666666666 -0.6666666666666667 -0.7804878048780486 -0.9690402476780187 -0.8888888888888888 -0.7714285714285714 -0.9428571428571431 -0.5820895522388059 -1.0 1.0 -1.0 -1.0 -1.0 -1.0 -1.0 +-1.0 -0.2727272727272727 0.9333333333333333 0.07407407407407396 -0.2926829268292681 0.36222910216718196 0.2777777777777777 0.37142857142857144 0.2571428571428571 0.22388059701492558 -1.0 1.0 -1.0 -1.0 -1.0 -1.0 -1.0 +-1.0 0.6363636363636365 0.46666666666666656 -0.11111111111111116 -0.3414634146341462 -0.2569659442724461 -0.11111111111111116 -0.3142857142857145 -0.1428571428571428 0.16417910447761197 -1.0 -1.0 -1.0 1.0 -1.0 -1.0 -1.0 +-1.0 0.4545454545454546 0.9333333333333333 0.22222222222222188 -0.24390243902439002 0.27554179566563475 0.2777777777777777 0.19999999999999973 0.19999999999999973 0.37313432835820914 1.0 -1.0 -1.0 -1.0 -1.0 -1.0 -1.0 +-1.0 0.4545454545454546 -0.2666666666666666 0.5555555555555554 -0.14634146341463405 0.6532507739938072 0.4444444444444442 0.5428571428571431 0.6571428571428575 0.8507462686567162 -1.0 1.0 -1.0 -1.0 -1.0 -1.0 -1.0 +-1.0 0.8181818181818181 -0.9333333333333333 -0.11111111111111116 -0.4634146341463412 -0.43653250773993824 -0.2777777777777777 -0.3142857142857145 -0.37142857142857144 0.25373134328358193 -1.0 -1.0 -1.0 -1.0 -1.0 -1.0 1.0 +-1.0 0.8181818181818181 0.06666666666666665 -0.2592592592592595 -0.6341463414634145 -0.6656346749226008 -0.7222222222222223 -0.7142857142857144 -0.657142857142857 -0.13432835820895517 -1.0 -1.0 -1.0 -1.0 1.0 -1.0 -1.0 +-1.0 -0.6363636363636365 -0.8666666666666667 -0.14814814814814836 -0.5121951219512193 -0.5851393188854495 -0.5555555555555558 -0.5428571428571431 -0.657142857142857 0.28358208955223874 -1.0 -1.0 -1.0 -1.0 1.0 -1.0 -1.0 +-1.0 0.8181818181818181 0.33333333333333326 -0.18518518518518512 -0.5121951219512193 -0.7275541795665634 -0.7222222222222223 -0.7142857142857144 -0.7142857142857144 0.16417910447761197 -1.0 1.0 -1.0 -1.0 -1.0 -1.0 -1.0 +-1.0 0.9999999999999998 0.7333333333333334 -0.7407407407407409 -0.8292682926829267 -0.9938080495356036 -1.0 -0.7714285714285714 -0.8285714285714287 -0.4328358208955223 -1.0 -1.0 -1.0 -1.0 -1.0 1.0 -1.0 +-1.0 -0.4545454545454546 0.5333333333333334 0.07407407407407396 -0.26829268292682906 -0.07120743034055721 -0.11111111111111116 -0.1428571428571428 -0.08571428571428585 -0.6119402985074627 -1.0 -1.0 -1.0 1.0 -1.0 -1.0 -1.0 +-1.0 -0.2727272727272727 0.2666666666666666 0.07407407407407396 -0.31707317073170715 0.2693498452012375 0.22222222222222232 0.37142857142857144 0.1428571428571428 0.34328358208955234 1.0 -1.0 -1.0 -1.0 -1.0 -1.0 -1.0 +-1.0 -1.0 0.0 -0.4814814814814816 -0.6829268292682924 -0.8637770897832819 -0.7777777777777777 -0.657142857142857 -0.8857142857142857 -1.0 -1.0 -1.0 1.0 -1.0 -1.0 -1.0 -1.0 +-1.0 0.9999999999999998 -0.6 -0.8148148148148149 -0.8292682926829267 -0.9256965944272451 -0.8333333333333335 -0.7142857142857144 -0.8857142857142857 0.014925373134328401 -1.0 -1.0 -1.0 -1.0 -1.0 -1.0 1.0 +-1.0 -1.0 -0.6 -0.6666666666666667 -0.6097560975609755 -0.931888544891641 -0.7777777777777777 -0.8285714285714287 -0.8857142857142857 -0.7014925373134329 -1.0 -1.0 -1.0 -1.0 1.0 -1.0 -1.0 +-1.0 0.4545454545454546 0.5333333333333334 0.18518518518518512 -0.2926829268292681 0.41795665634674917 0.33333333333333304 0.4285714285714284 0.2571428571428571 0.07462686567164178 -1.0 -1.0 1.0 -1.0 -1.0 -1.0 -1.0 +-1.0 0.2727272727272727 0.9333333333333333 0.6296296296296293 -0.024390243902438824 0.8266253869969034 0.833333333333333 0.714285714285714 0.7714285714285714 0.04477611940298498 -1.0 -1.0 -1.0 -1.0 -1.0 1.0 -1.0 +-1.0 -1.0 -0.33333333333333337 -0.44444444444444464 -0.5853658536585364 -0.9009287925696592 -0.9444444444444446 -0.8857142857142857 -0.7714285714285714 -0.6716417910447761 -1.0 1.0 -1.0 -1.0 -1.0 -1.0 -1.0 +-1.0 -0.09090909090909105 -0.4666666666666667 0.8518518518518516 -0.21951219512195097 0.4551083591331264 0.38888888888888884 0.5428571428571431 0.4285714285714284 0.5522388059701491 -1.0 -1.0 -1.0 -1.0 1.0 -1.0 -1.0 +-1.0 0.4545454545454546 0.3999999999999999 0.18518518518518512 -0.19512195121951192 0.4613003095975228 0.33333333333333304 0.37142857142857144 0.4285714285714284 -0.16417910447761197 -1.0 -1.0 -1.0 -1.0 1.0 -1.0 -1.0 +-1.0 -0.6363636363636365 0.6000000000000001 -0.3333333333333335 -0.5365853658536583 -0.38699690402476783 -0.3333333333333335 -0.37142857142857144 -0.2571428571428571 -0.5820895522388059 1.0 -1.0 -1.0 -1.0 -1.0 -1.0 -1.0 +-1.0 0.6363636363636365 0.5333333333333334 0.0 -0.3414634146341462 -0.2755417956656352 -0.38888888888888884 -0.1428571428571428 -0.2571428571428571 0.25373134328358193 -1.0 1.0 -1.0 -1.0 -1.0 -1.0 -1.0 +-1.0 0.09090909090909083 0.0 0.5555555555555554 2.220446049250313e-16 0.9195046439628478 0.9444444444444446 0.8285714285714287 0.7714285714285714 -0.014925373134328401 -1.0 -1.0 1.0 -1.0 -1.0 -1.0 -1.0 +-1.0 0.09090909090909083 -1.0 0.4444444444444442 -0.07317073170731692 0.7337461300309589 0.6666666666666665 0.657142857142857 0.5999999999999996 0.9402985074626866 1.0 -1.0 -1.0 -1.0 -1.0 -1.0 -1.0 +-1.0 0.8181818181818181 0.1333333333333333 -0.44444444444444464 -0.5853658536585364 -0.6780185758513935 -0.5 -0.6000000000000001 -0.657142857142857 -0.791044776119403 1.0 -1.0 -1.0 -1.0 -1.0 -1.0 -1.0 +-1.0 0.4545454545454546 -0.8666666666666667 0.4814814814814814 -0.14634146341463405 0.7832817337461297 0.6666666666666665 0.657142857142857 0.6571428571428575 0.19402985074626877 -1.0 -1.0 1.0 -1.0 -1.0 -1.0 -1.0 +-1.0 0.2727272727272727 -0.9333333333333333 0.40740740740740744 0.024390243902439268 1.0000000000000004 0.8888888888888884 0.9428571428571426 1.0 0.014925373134328401 -1.0 -1.0 -1.0 -1.0 -1.0 1.0 -1.0 +-1.0 -0.4545454545454546 -0.7333333333333334 0.2592592592592591 -0.3902439024390243 -0.28792569659442746 -0.38888888888888884 -0.3142857142857145 -0.3142857142857145 0.31343283582089554 -1.0 -1.0 -1.0 -1.0 -1.0 1.0 -1.0 +-1.0 -0.6363636363636365 -0.19999999999999996 -0.2592592592592595 -0.5853658536585364 -0.492260061919505 -0.5 -0.48571428571428577 -0.48571428571428577 -0.22388059701492535 -1.0 -1.0 -1.0 1.0 -1.0 -1.0 -1.0 +-1.0 0.2727272727272727 0.8 0.7037037037037037 0.07317073170731736 0.8513931888544888 0.6666666666666665 0.7714285714285709 0.8285714285714283 0.7014925373134326 -1.0 -1.0 -1.0 1.0 -1.0 -1.0 -1.0 +-1.0 -0.4545454545454546 -0.4666666666666667 0.5555555555555554 2.220446049250313e-16 -0.25077399380804977 -0.3333333333333335 -0.1428571428571428 -0.2571428571428571 0.37313432835820914 -1.0 -1.0 1.0 -1.0 -1.0 -1.0 -1.0 +-1.0 -0.2727272727272727 0.6666666666666667 0.14814814814814792 -0.24390243902439002 0.3250773993808047 0.2777777777777777 0.37142857142857144 0.19999999999999973 0.7014925373134326 -1.0 -1.0 -1.0 -1.0 1.0 -1.0 -1.0 +-1.0 0.6363636363636365 -0.4 0.22222222222222188 -0.4634146341463412 0.03405572755417907 -0.0555555555555558 0.02857142857142847 -0.02857142857142847 0.014925373134328401 -1.0 1.0 -1.0 -1.0 -1.0 -1.0 -1.0 +-1.0 -0.4545454545454546 -0.4 0.5185185185185186 -0.24390243902439002 -0.23839009287925705 -0.11111111111111116 -0.20000000000000018 -0.2571428571428571 -0.04477611940298498 -1.0 -1.0 -1.0 1.0 -1.0 -1.0 -1.0 +-1.0 0.6363636363636365 0.19999999999999996 -0.07407407407407396 -0.36585365853658525 -0.17647058823529438 -0.0555555555555558 -0.20000000000000018 -0.2571428571428571 -0.6119402985074627 -1.0 -1.0 -1.0 -1.0 -1.0 -1.0 1.0 +-1.0 -0.6363636363636365 -0.2666666666666666 -0.22222222222222232 -0.5121951219512193 -0.5046439628482973 -0.38888888888888884 -0.3142857142857145 -0.48571428571428577 0.10447761194029859 -1.0 -1.0 1.0 -1.0 -1.0 -1.0 -1.0 +-1.0 -1.0 0.5333333333333334 -0.18518518518518512 -0.6829268292682924 -0.8142414860681115 -0.7222222222222223 -0.7714285714285714 -0.7714285714285714 -0.22388059701492535 -1.0 -1.0 -1.0 1.0 -1.0 -1.0 -1.0 +-1.0 -0.8181818181818182 -0.6 -0.3333333333333335 -0.6585365853658536 -0.7461300309597525 -0.7222222222222223 -0.7142857142857144 -0.7714285714285714 0.04477611940298498 -1.0 -1.0 -1.0 1.0 -1.0 -1.0 -1.0 +-1.0 -0.2727272727272727 0.7333333333333334 0.14814814814814792 -0.26829268292682906 0.3374613003095974 0.2777777777777777 0.19999999999999973 0.37142857142857144 0.34328358208955234 1.0 -1.0 -1.0 -1.0 -1.0 -1.0 -1.0 +-1.0 -0.2727272727272727 -0.7333333333333334 0.4444444444444442 -0.3902439024390243 0.07739938080495312 -0.0555555555555558 0.1428571428571428 0.02857142857142847 -0.16417910447761197 -1.0 -1.0 -1.0 -1.0 1.0 -1.0 -1.0 +-1.0 -0.6363636363636365 -0.33333333333333337 -0.2592592592592595 -0.48780487804878025 -0.5108359133126936 -0.3333333333333335 -0.6000000000000001 -0.6000000000000001 -0.7611940298507462 1.0 -1.0 -1.0 -1.0 -1.0 -1.0 -1.0 +-1.0 0.6363636363636365 0.3999999999999999 0.0 -0.41463414634146334 -0.23839009287925705 -0.16666666666666696 -0.2571428571428571 -0.20000000000000018 -0.5223880597014925 -1.0 -1.0 1.0 -1.0 -1.0 -1.0 -1.0 +-1.0 0.9999999999999998 -0.33333333333333337 -0.962962962962963 -0.7804878048780486 -0.9628482972136223 -1.0 -1.0 -0.8285714285714287 -0.791044776119403 -1.0 -1.0 -1.0 1.0 -1.0 -1.0 -1.0 +-1.0 -0.2727272727272727 -0.5333333333333333 0.5555555555555554 0.14634146341463428 0.12074303405572762 0.16666666666666652 0.08571428571428541 0.08571428571428585 0.6417910447761195 -1.0 -1.0 -1.0 1.0 -1.0 -1.0 -1.0 +-1.0 -0.2727272727272727 0.8666666666666667 0.07407407407407396 -0.2926829268292681 0.3560371517027865 0.33333333333333304 0.4285714285714284 0.19999999999999973 0.4328358208955223 -1.0 -1.0 -1.0 1.0 -1.0 -1.0 -1.0 +-1.0 0.9999999999999998 -0.19999999999999996 -0.6666666666666667 -0.8048780487804876 -0.975232198142415 -1.0 -0.9428571428571431 -0.8857142857142857 -0.4626865671641791 -1.0 -1.0 -1.0 -1.0 -1.0 1.0 -1.0 +-1.0 -0.6363636363636365 0.9333333333333333 -0.22222222222222232 -0.2926829268292681 -0.3436532507739938 -0.44444444444444464 -0.37142857142857144 -0.3142857142857145 -0.13432835820895517 -1.0 -1.0 -1.0 -1.0 -1.0 -1.0 1.0 +-1.0 0.8181818181818181 -0.5333333333333333 -0.0370370370370372 -0.31707317073170715 -0.5294117647058822 -0.5555555555555558 -0.37142857142857144 -0.5428571428571427 -0.3731343283582089 -1.0 -1.0 -1.0 -1.0 -1.0 1.0 -1.0 +-1.0 -0.09090909090909105 0.2666666666666666 0.11111111111111116 -0.14634146341463405 0.5789473684210518 0.4444444444444442 0.4285714285714284 0.48571428571428577 0.5223880597014927 -1.0 1.0 -1.0 -1.0 -1.0 -1.0 -1.0 +-1.0 0.8181818181818181 -0.4666666666666667 0.03703703703703676 -0.12195121951219501 -0.5479876160990713 -0.6111111111111112 -0.4285714285714288 -0.5428571428571427 -0.5820895522388059 -1.0 -1.0 -1.0 -1.0 -1.0 -1.0 1.0 +-1.0 0.09090909090909083 -0.8666666666666667 0.5185185185185186 2.220446049250313e-16 0.7585139318885443 0.5555555555555554 0.714285714285714 0.7714285714285714 0.7014925373134326 -1.0 -1.0 -1.0 1.0 -1.0 -1.0 -1.0 +-1.0 0.6363636363636365 -0.4666666666666667 0.07407407407407396 -0.19512195121951192 0.05263157894736814 -0.0555555555555558 0.08571428571428541 0.08571428571428585 -0.19402985074626855 -1.0 -1.0 -1.0 1.0 -1.0 -1.0 -1.0 +-1.0 0.9999999999999998 0.0 -0.8518518518518519 -0.9024390243902437 -0.9876160990712077 -0.8333333333333335 -0.8285714285714287 -1.0 -0.6716417910447761 1.0 -1.0 -1.0 -1.0 -1.0 -1.0 -1.0 +-1.0 0.4545454545454546 0.0 0.6296296296296293 -0.12195121951219501 0.585139318885449 0.6111111111111112 0.6000000000000001 0.5428571428571427 -0.28358208955223874 1.0 -1.0 -1.0 -1.0 -1.0 -1.0 -1.0 +-1.0 -0.09090909090909105 0.6000000000000001 0.22222222222222188 -0.17073170731707288 0.6470588235294117 0.5 0.5428571428571431 0.6571428571428575 0.8208955223880599 -1.0 -1.0 1.0 -1.0 -1.0 -1.0 -1.0 +-1.0 0.4545454545454546 -0.19999999999999996 0.2962962962962963 -0.04878048780487787 0.634674922600619 0.6666666666666665 0.657142857142857 0.48571428571428577 0.6119402985074627 -1.0 -1.0 -1.0 -1.0 -1.0 1.0 -1.0 +-1.0 -0.2727272727272727 -0.2666666666666666 0.4814814814814814 0.12195121951219523 0.1764705882352935 0.16666666666666652 0.19999999999999973 0.08571428571428585 0.5820895522388059 -1.0 -1.0 -1.0 -1.0 1.0 -1.0 -1.0 +-1.0 -0.8181818181818182 -0.5333333333333333 -0.4814814814814816 -0.6097560975609755 -0.7399380804953566 -0.5555555555555558 -0.657142857142857 -0.657142857142857 -0.8208955223880596 -1.0 1.0 -1.0 -1.0 -1.0 -1.0 -1.0 +-1.0 -1.0 -0.2666666666666666 -0.37037037037037046 -0.7560975609756095 -0.8947368421052633 -0.8333333333333335 -0.7714285714285714 -0.9428571428571431 -0.014925373134328401 -1.0 -1.0 -1.0 -1.0 -1.0 1.0 -1.0 +-1.0 0.2727272727272727 -0.19999999999999996 0.6666666666666665 0.2682926829268295 0.9628482972136214 0.7777777777777777 0.8857142857142857 0.9428571428571431 0.34328358208955234 -1.0 -1.0 1.0 -1.0 -1.0 -1.0 -1.0 +-1.0 0.09090909090909083 -0.8 0.5185185185185186 -0.12195121951219501 0.7770897832817334 0.6666666666666665 0.714285714285714 0.6571428571428575 0.7313432835820894 -1.0 1.0 -1.0 -1.0 -1.0 -1.0 -1.0 +-1.0 -0.4545454545454546 0.6000000000000001 0.11111111111111116 -0.5121951219512193 -0.05882352941176494 -0.16666666666666696 0.02857142857142847 -0.02857142857142847 0.4626865671641791 -1.0 1.0 -1.0 -1.0 -1.0 -1.0 -1.0 +-1.0 0.2727272727272727 -0.2666666666666666 0.5185185185185186 0.09756097560975618 0.9690402476780187 0.7222222222222223 0.8857142857142857 0.8857142857142852 0.5820895522388059 1.0 -1.0 -1.0 -1.0 -1.0 -1.0 -1.0 +-1.0 0.4545454545454546 0.33333333333333326 0.33333333333333304 -0.21951219512195097 0.47987616099071184 0.33333333333333304 0.37142857142857144 0.48571428571428577 0.4328358208955223 -1.0 -1.0 -1.0 -1.0 -1.0 -1.0 1.0 +-1.0 -0.4545454545454546 0.9333333333333333 0.07407407407407396 -0.36585365853658525 0.00928792569659409 0.05555555555555536 0.08571428571428541 0.02857142857142847 0.4925373134328359 -1.0 -1.0 1.0 -1.0 -1.0 -1.0 -1.0 +-1.0 0.9999999999999998 -0.7333333333333334 -0.4814814814814816 -0.7317073170731705 -0.907120743034056 -0.8888888888888888 -0.7714285714285714 -0.9428571428571431 0.10447761194029859 -1.0 1.0 -1.0 -1.0 -1.0 -1.0 -1.0 +-1.0 0.9999999999999998 0.19999999999999996 -1.0 -0.9024390243902437 -1.0 -0.9444444444444446 -1.0 -0.9428571428571431 -0.31343283582089554 -1.0 1.0 -1.0 -1.0 -1.0 -1.0 -1.0 +-1.0 0.4545454545454546 0.46666666666666656 0.22222222222222188 -0.21951219512195097 0.43653250773993735 0.4444444444444442 0.31428571428571406 0.31428571428571406 -0.014925373134328401 1.0 -1.0 -1.0 -1.0 -1.0 -1.0 -1.0 +-1.0 0.8181818181818181 0.8666666666666667 -0.5185185185185186 -0.5853658536585364 -0.8328173374613006 -0.8888888888888888 -0.8857142857142857 -0.8285714285714287 -0.34328358208955234 -1.0 -1.0 -1.0 -1.0 -1.0 1.0 -1.0 +-1.0 -0.09090909090909105 0.0 -0.07407407407407396 -0.21951219512195097 0.5294117647058818 0.5 0.4857142857142853 0.5428571428571427 0.7611940298507462 -1.0 -1.0 -1.0 -1.0 1.0 -1.0 -1.0 +-1.0 0.4545454545454546 -0.1333333333333333 0.4444444444444442 -0.024390243902438824 0.6160990712074299 0.4444444444444442 0.657142857142857 0.6571428571428575 0.4626865671641791 -1.0 -1.0 -1.0 -1.0 -1.0 -1.0 1.0 +-1.0 0.4545454545454546 -0.6666666666666666 0.22222222222222188 -0.19512195121951192 0.7461300309597516 0.7777777777777777 0.714285714285714 0.7714285714285714 0.5223880597014927 -1.0 -1.0 -1.0 -1.0 -1.0 1.0 -1.0 +-1.0 -0.09090909090909105 -0.6666666666666666 0.7037037037037037 0.39024390243902474 0.4303405572755419 0.33333333333333304 0.37142857142857144 0.31428571428571406 0.28358208955223874 -1.0 1.0 -1.0 -1.0 -1.0 -1.0 -1.0 +-1.0 0.4545454545454546 -0.5333333333333333 0.22222222222222188 -0.21951219512195097 0.7151702786377707 0.5555555555555554 0.7714285714285709 0.6571428571428575 -0.16417910447761197 -1.0 -1.0 -1.0 -1.0 1.0 -1.0 -1.0 +-1.0 -1.0 -0.8666666666666667 -0.6296296296296298 -0.7804878048780486 -0.9566563467492264 -0.8888888888888888 -1.0 -0.8285714285714287 -0.16417910447761197 -1.0 -1.0 -1.0 1.0 -1.0 -1.0 -1.0 +-1.0 -0.4545454545454546 0.8 -0.07407407407407396 -0.36585365853658525 -0.01547987616099089 -0.16666666666666696 0.08571428571428541 0.02857142857142847 0.34328358208955234 -1.0 -1.0 -1.0 -1.0 1.0 -1.0 -1.0 +-1.0 0.8181818181818181 -0.7333333333333334 0.11111111111111116 -0.26829268292682906 -0.48606811145510864 -0.5555555555555558 -0.3142857142857145 -0.5428571428571427 -0.6119402985074627 -1.0 -1.0 1.0 -1.0 -1.0 -1.0 -1.0 +-1.0 0.4545454545454546 -0.6 0.22222222222222188 -0.19512195121951192 0.7275541795665634 0.7222222222222223 0.8285714285714287 0.5428571428571427 0.25373134328358193 -1.0 -1.0 -1.0 -1.0 -1.0 -1.0 1.0 +-1.0 -0.2727272727272727 -0.8666666666666667 0.5555555555555554 0.2682926829268295 0.05263157894736814 0.16666666666666652 0.1428571428571428 0.1428571428571428 0.22388059701492558 -1.0 -1.0 -1.0 -1.0 -1.0 1.0 -1.0 +-1.0 0.6363636363636365 0.9999999999999998 0.11111111111111116 1.0000000000000004 -0.39938080495356054 -0.44444444444444464 -0.2571428571428571 -0.3142857142857145 0.014925373134328401 -1.0 1.0 -1.0 -1.0 -1.0 -1.0 -1.0 +-1.0 0.09090909090909083 0.1333333333333333 0.37037037037037024 0.09756097560975618 0.9380804953560369 0.8888888888888884 0.7714285714285709 0.7714285714285714 0.13432835820895517 -1.0 1.0 -1.0 -1.0 -1.0 -1.0 -1.0 +-1.0 0.8181818181818181 -0.06666666666666665 -0.2592592592592595 -0.4634146341463412 -0.6346749226006194 -0.6666666666666665 -0.5428571428571431 -0.6000000000000001 -0.4626865671641791 -1.0 -1.0 -1.0 -1.0 -1.0 1.0 -1.0 +-1.0 -0.2727272727272727 -0.4 0.03703703703703676 -0.21951219512195097 0.1455108359133126 0.11111111111111116 0.1428571428571428 0.1428571428571428 0.19402985074626877 -1.0 -1.0 -1.0 -1.0 -1.0 1.0 -1.0 +-1.0 -0.6363636363636365 0.1333333333333333 -0.3333333333333335 -0.43902439024390216 -0.44891640866873095 -0.44444444444444464 -0.37142857142857144 -0.4285714285714288 -0.16417910447761197 1.0 -1.0 -1.0 -1.0 -1.0 -1.0 -1.0 +-1.0 0.6363636363636365 0.6666666666666667 -0.0370370370370372 -0.26829268292682906 -0.31269349845201244 -0.3333333333333335 -0.37142857142857144 -0.2571428571428571 -0.6119402985074627 -1.0 -1.0 -1.0 -1.0 -1.0 -1.0 1.0 +-1.0 -1.0 0.9333333333333333 -0.22222222222222232 -0.5853658536585364 -0.7832817337461302 -0.7777777777777777 -0.7142857142857144 -0.7714285714285714 -0.4328358208955223 -1.0 -1.0 1.0 -1.0 -1.0 -1.0 -1.0 +-1.0 -0.6363636363636365 0.7333333333333334 -0.18518518518518512 -0.41463414634146334 -0.3684210526315792 -0.38888888888888884 -0.3142857142857145 -0.37142857142857144 -0.6716417910447761 -1.0 -1.0 -1.0 1.0 -1.0 -1.0 -1.0 +-1.0 0.8181818181818181 -0.8666666666666667 -0.18518518518518512 -0.4634146341463412 -0.4551083591331273 -0.3333333333333335 -0.5428571428571431 -0.4285714285714288 -0.791044776119403 -1.0 -1.0 -1.0 -1.0 1.0 -1.0 -1.0 +-1.0 -0.4545454545454546 0.2666666666666666 1.0 0.12195121951219523 -0.12693498452012397 -0.16666666666666696 -0.02857142857142847 -0.02857142857142847 0.13432835820895517 -1.0 -1.0 -1.0 -1.0 -1.0 -1.0 1.0 +-1.0 0.09090909090909083 0.5333333333333334 0.33333333333333304 -0.024390243902438824 0.9814241486068105 0.9444444444444446 0.8285714285714287 0.9428571428571431 0.4029850746268655 -1.0 -1.0 -1.0 1.0 -1.0 -1.0 -1.0 +-1.0 0.09090909090909083 0.9999999999999998 0.9629629629629628 2.220446049250313e-16 1.0000000000000004 0.9444444444444446 0.8285714285714287 1.0 0.9999999999999998 -1.0 -1.0 -1.0 1.0 -1.0 -1.0 -1.0 +-1.0 -0.2727272727272727 0.0 0.11111111111111116 -0.4634146341463412 0.21981424148606754 0.11111111111111116 0.08571428571428541 0.19999999999999973 -0.25373134328358216 -1.0 1.0 -1.0 -1.0 -1.0 -1.0 -1.0 +-1.0 0.09090909090909083 -0.6666666666666666 0.22222222222222188 -0.17073170731707288 0.8018575851393188 0.7222222222222223 0.714285714285714 0.7714285714285714 0.7313432835820894 -1.0 -1.0 -1.0 -1.0 -1.0 -1.0 1.0 +-1.0 0.4545454545454546 0.7333333333333334 0.5185185185185186 0.024390243902439268 0.3436532507739938 0.38888888888888884 0.19999999999999973 0.37142857142857144 0.07462686567164178 -1.0 -1.0 -1.0 -1.0 -1.0 1.0 -1.0 +-1.0 -0.8181818181818182 0.0 -0.14814814814814836 -0.5121951219512193 -0.7027863777089784 -0.6666666666666665 -0.5428571428571431 -0.6000000000000001 -0.19402985074626855 -1.0 -1.0 -1.0 -1.0 -1.0 1.0 -1.0 +-1.0 0.9999999999999998 -0.8 -0.44444444444444464 -0.6585365853658536 -0.8947368421052633 -0.7777777777777777 -0.9428571428571431 -0.8285714285714287 -0.25373134328358216 -1.0 -1.0 -1.0 1.0 -1.0 -1.0 -1.0 +-1.0 -0.6363636363636365 -0.4666666666666667 -0.3333333333333335 -0.5365853658536583 -0.5294117647058822 -0.6111111111111112 -0.4285714285714288 -0.4285714285714288 -0.13432835820895517 -1.0 -1.0 -1.0 -1.0 -1.0 -1.0 1.0 +-1.0 0.8181818181818181 -0.1333333333333333 -0.11111111111111116 -0.5121951219512193 -0.6222910216718267 -0.5555555555555558 -0.6000000000000001 -0.48571428571428577 -0.5820895522388059 -1.0 1.0 -1.0 -1.0 -1.0 -1.0 -1.0 +-1.0 -0.6363636363636365 0.8666666666666667 -0.40740740740740744 -0.48780487804878025 -0.34984520123839014 -0.3333333333333335 -0.2571428571428571 -0.4285714285714288 -0.4925373134328359 -1.0 -1.0 -1.0 -1.0 -1.0 1.0 -1.0 +-1.0 0.09090909090909083 -0.5333333333333333 0.5185185185185186 -0.19512195121951192 0.8266253869969034 0.7222222222222223 0.8857142857142857 0.7714285714285714 0.4626865671641791 1.0 -1.0 -1.0 -1.0 -1.0 -1.0 -1.0 +-1.0 -0.6363636363636365 -0.1333333333333333 -0.37037037037037046 -0.5365853658536583 -0.48606811145510864 -0.5555555555555558 -0.3142857142857145 -0.37142857142857144 -0.5223880597014925 -1.0 1.0 -1.0 -1.0 -1.0 -1.0 -1.0 +-1.0 -0.09090909090909105 -0.33333333333333337 0.11111111111111116 -0.21951219512195097 0.47987616099071184 0.5555555555555554 0.4857142857142853 0.5428571428571427 0.7611940298507462 -1.0 -1.0 1.0 -1.0 -1.0 -1.0 -1.0 +-1.0 -1.0 -0.19999999999999996 -0.6296296296296298 -0.6585365853658536 -0.8885448916408669 -0.7777777777777777 -0.7142857142857144 -0.8857142857142857 -0.8507462686567164 -1.0 -1.0 -1.0 -1.0 -1.0 -1.0 1.0 +-1.0 -0.8181818181818182 -0.7333333333333334 -0.4814814814814816 -0.6585365853658536 -0.7523219814241489 -0.6666666666666665 -0.7714285714285714 -0.7142857142857144 -0.4925373134328359 1.0 -1.0 -1.0 -1.0 -1.0 -1.0 -1.0 +-1.0 -1.0 0.8666666666666667 -0.18518518518518512 -0.48780487804878025 -0.7894736842105265 -0.6111111111111112 -0.657142857142857 -0.8285714285714287 -0.3731343283582089 1.0 -1.0 -1.0 -1.0 -1.0 -1.0 -1.0 +-1.0 -0.09090909090909105 0.3999999999999999 0.5185185185185186 -0.07317073170731692 0.6037151702786372 0.38888888888888884 0.4285714285714284 0.5999999999999996 0.4925373134328359 -1.0 -1.0 -1.0 -1.0 -1.0 -1.0 1.0 +-1.0 -0.2727272727272727 0.6000000000000001 0.11111111111111116 -0.24390243902439002 0.31888544891640835 0.33333333333333304 0.19999999999999973 0.2571428571428571 -0.04477611940298498 -1.0 -1.0 -1.0 -1.0 -1.0 -1.0 1.0 +-1.0 0.4545454545454546 0.8 0.5555555555555554 -0.17073170731707288 0.3250773993808047 0.38888888888888884 0.2571428571428571 0.2571428571428571 0.014925373134328401 -1.0 -1.0 -1.0 -1.0 -1.0 -1.0 1.0 +-1.0 -0.2727272727272727 -0.1333333333333333 0.5555555555555554 0.14634146341463428 0.20123839009287892 0.2777777777777777 0.1428571428571428 0.2571428571428571 0.10447761194029859 -1.0 -1.0 1.0 -1.0 -1.0 -1.0 -1.0 +-1.0 0.2727272727272727 -0.1333333333333333 0.9259259259259256 0.34146341463414664 0.956656346749226 0.8888888888888884 0.8285714285714287 0.9428571428571431 0.10447761194029859 -1.0 -1.0 -1.0 1.0 -1.0 -1.0 -1.0 +-1.0 -0.8181818181818182 0.46666666666666656 -0.40740740740740744 -0.6097560975609755 -0.653250773993808 -0.5555555555555558 -0.6000000000000001 -0.6000000000000001 -0.5522388059701493 -1.0 -1.0 -1.0 -1.0 -1.0 1.0 -1.0 +-1.0 -0.4545454545454546 -0.5333333333333333 0.22222222222222188 0.024390243902439268 -0.2569659442724461 -0.11111111111111116 -0.1428571428571428 -0.2571428571428571 -0.6119402985074627 1.0 -1.0 -1.0 -1.0 -1.0 -1.0 -1.0 +-1.0 0.6363636363636365 -0.33333333333333337 -0.18518518518518512 -0.3902439024390243 0.00928792569659409 -0.0555555555555558 0.1428571428571428 -0.02857142857142847 -0.10447761194029859 -1.0 -1.0 -1.0 -1.0 -1.0 1.0 -1.0 +-1.0 -0.09090909090909105 0.9333333333333333 0.6296296296296293 -0.04878048780487787 0.7151702786377707 0.6666666666666665 0.714285714285714 0.5999999999999996 0.7611940298507462 -1.0 -1.0 -1.0 -1.0 1.0 -1.0 -1.0 +-1.0 0.09090909090909083 0.6666666666666667 0.6666666666666665 0.21951219512195141 0.9876160990712068 0.7777777777777777 0.8857142857142857 0.8285714285714283 0.37313432835820914 -1.0 -1.0 -1.0 -1.0 -1.0 1.0 -1.0 +-1.0 -0.2727272727272727 -0.6666666666666666 -0.07407407407407396 -0.19512195121951192 0.09597523219814219 0.11111111111111116 0.02857142857142847 -0.02857142857142847 0.07462686567164178 1.0 -1.0 -1.0 -1.0 -1.0 -1.0 -1.0 +-1.0 -0.8181818181818182 -0.33333333333333337 0.0 -0.48780487804878025 -0.7275541795665634 -0.7222222222222223 -0.6000000000000001 -0.657142857142857 -0.7313432835820894 -1.0 -1.0 -1.0 -1.0 1.0 -1.0 -1.0 +-1.0 -0.4545454545454546 -0.9333333333333333 0.40740740740740744 -0.12195121951219501 -0.31269349845201244 -0.22222222222222232 -0.3142857142857145 -0.20000000000000018 -0.4925373134328359 -1.0 -1.0 1.0 -1.0 -1.0 -1.0 -1.0 +-1.0 0.6363636363636365 0.0 -0.07407407407407396 -0.3414634146341462 -0.10835913312693535 -0.11111111111111116 -0.20000000000000018 -0.1428571428571428 -0.6417910447761195 -1.0 -1.0 -1.0 1.0 -1.0 -1.0 -1.0 +-1.0 0.09090909090909083 0.8 0.6296296296296293 0.17073170731707332 0.9938080495356032 0.9444444444444446 0.9428571428571426 0.9428571428571431 0.4328358208955223 -1.0 -1.0 -1.0 -1.0 1.0 -1.0 -1.0 +-1.0 -0.2727272727272727 0.19999999999999996 0.33333333333333304 -0.2926829268292681 0.2569659442724457 0.16666666666666652 0.2571428571428571 0.31428571428571406 -0.16417910447761197 -1.0 -1.0 -1.0 -1.0 1.0 -1.0 -1.0 +-1.0 -1.0 0.7333333333333334 -0.2962962962962963 -0.48780487804878025 -0.7956656346749229 -0.7777777777777777 -0.7142857142857144 -0.7142857142857144 -0.22388059701492535 -1.0 -1.0 -1.0 -1.0 -1.0 -1.0 1.0 +-1.0 0.9999999999999998 0.6000000000000001 -0.8148148148148149 -0.8536585365853657 -1.0 -0.9444444444444446 -0.8285714285714287 -1.0 -0.9104477611940298 -1.0 -1.0 -1.0 1.0 -1.0 -1.0 -1.0 +-1.0 -0.2727272727272727 0.5333333333333334 0.14814814814814792 -0.26829268292682906 0.30650154798761564 0.38888888888888884 0.4285714285714284 0.2571428571428571 0.16417910447761197 -1.0 -1.0 -1.0 -1.0 -1.0 1.0 -1.0 +-1.0 0.8181818181818181 -0.8 -0.18518518518518512 -0.26829268292682906 -0.4674922600619196 -0.5555555555555558 -0.48571428571428577 -0.4285714285714288 -0.7014925373134329 1.0 -1.0 -1.0 -1.0 -1.0 -1.0 -1.0 +-1.0 -1.0 -0.7333333333333334 -0.7777777777777779 -0.8780487804878048 -0.9442724458204337 -0.7222222222222223 -1.0 -0.8857142857142857 -0.6119402985074627 -1.0 -1.0 -1.0 -1.0 -1.0 1.0 -1.0 +-1.0 -1.0 -1.0 -0.6296296296296298 -0.7560975609756095 -0.9690402476780187 -0.8888888888888888 -0.7714285714285714 -1.0 -0.9701492537313433 1.0 -1.0 -1.0 -1.0 -1.0 -1.0 -1.0 +-1.0 0.8181818181818181 0.6666666666666667 -0.37037037037037046 -0.5853658536585364 -0.7956656346749229 -0.6111111111111112 -0.7714285714285714 -0.8285714285714287 -0.10447761194029859 -1.0 -1.0 1.0 -1.0 -1.0 -1.0 -1.0 +-1.0 0.8181818181818181 -0.2666666666666666 0.07407407407407396 -0.31707317073170715 -0.5913312693498454 -0.5 -0.657142857142857 -0.5428571428571427 0.04477611940298498 -1.0 -1.0 1.0 -1.0 -1.0 -1.0 -1.0 +-1.0 0.8181818181818181 0.9333333333333333 -0.37037037037037046 -0.5853658536585364 -0.8452012383900929 -0.6666666666666665 -0.657142857142857 -0.7142857142857144 -0.5223880597014925 -1.0 -1.0 -1.0 -1.0 -1.0 -1.0 1.0 +-1.0 -0.4545454545454546 -0.19999999999999996 -0.14814814814814836 -0.3902439024390243 -0.20743034055727572 -0.22222222222222232 -0.08571428571428585 -0.3142857142857145 0.4626865671641791 -1.0 -1.0 -1.0 -1.0 -1.0 -1.0 1.0 +-1.0 0.2727272727272727 0.46666666666666656 0.8148148148148144 0.12195121951219523 0.8947368421052633 0.7777777777777777 0.8285714285714287 0.8857142857142852 0.8208955223880599 -1.0 -1.0 -1.0 -1.0 -1.0 1.0 -1.0 +-1.0 0.09090909090909083 -0.1333333333333333 0.5555555555555554 -0.024390243902438824 0.9009287925696587 0.833333333333333 0.714285714285714 0.8857142857142852 0.4626865671641791 -1.0 -1.0 -1.0 -1.0 1.0 -1.0 -1.0 +-1.0 0.8181818181818181 -0.19999999999999996 0.03703703703703676 -0.41463414634146334 -0.6099071207430344 -0.6111111111111112 -0.4285714285714288 -0.657142857142857 0.07462686567164178 -1.0 -1.0 -1.0 1.0 -1.0 -1.0 -1.0 +-1.0 0.2727272727272727 -0.4666666666666667 0.37037037037037024 -0.07317073170731692 0.9814241486068105 1.0 0.9428571428571426 1.0 0.9701492537313434 -1.0 -1.0 -1.0 -1.0 -1.0 1.0 -1.0 +-1.0 0.2727272727272727 -0.8 0.40740740740740744 -0.024390243902438824 0.9938080495356032 0.7777777777777777 0.8857142857142857 0.9428571428571431 0.13432835820895517 -1.0 -1.0 -1.0 -1.0 1.0 -1.0 -1.0 +-1.0 -0.4545454545454546 0.0 -0.11111111111111116 -0.3902439024390243 -0.17027863777089802 -0.16666666666666696 -0.20000000000000018 -0.1428571428571428 -0.07462686567164178 -1.0 -1.0 1.0 -1.0 -1.0 -1.0 -1.0 +-1.0 -0.09090909090909105 0.46666666666666656 0.40740740740740744 -0.024390243902438824 0.6222910216718263 0.5 0.4857142857142853 0.5428571428571427 -0.16417910447761197 -1.0 -1.0 -1.0 -1.0 1.0 -1.0 -1.0 +-1.0 -0.4545454545454546 -0.33333333333333337 0.14814814814814792 -0.41463414634146334 -0.22600619195046434 -0.16666666666666696 -0.20000000000000018 -0.20000000000000018 -0.6417910447761195 -1.0 1.0 -1.0 -1.0 -1.0 -1.0 -1.0 +-1.0 -0.8181818181818182 -0.6666666666666666 -0.4814814814814816 -0.5609756097560974 -0.7523219814241489 -0.6666666666666665 -0.6000000000000001 -0.7142857142857144 -0.16417910447761197 -1.0 -1.0 1.0 -1.0 -1.0 -1.0 -1.0 +-1.0 0.2727272727272727 -0.6666666666666666 0.6666666666666665 0.07317073170731736 0.9876160990712068 0.9444444444444446 0.9999999999999996 1.0 -0.04477611940298498 -1.0 -1.0 1.0 -1.0 -1.0 -1.0 -1.0 +-1.0 -0.6363636363636365 -0.7333333333333334 -0.11111111111111116 -0.4634146341463412 -0.5665634674922599 -0.5555555555555558 -0.6000000000000001 -0.6000000000000001 -0.4626865671641791 -1.0 -1.0 1.0 -1.0 -1.0 -1.0 -1.0 +-1.0 -0.09090909090909105 -0.9333333333333333 0.6296296296296293 -0.024390243902438824 0.39318885448916374 0.2777777777777777 0.4285714285714284 0.31428571428571406 0.4626865671641791 -1.0 -1.0 -1.0 -1.0 1.0 -1.0 -1.0 +-1.0 0.4545454545454546 0.2666666666666666 0.2592592592592591 -0.12195121951219501 0.5046439628482973 0.4444444444444442 0.5428571428571431 0.4285714285714284 0.5820895522388059 -1.0 -1.0 -1.0 -1.0 -1.0 1.0 -1.0 +-1.0 -0.8181818181818182 0.19999999999999996 -0.18518518518518512 -0.5609756097560974 -0.6842105263157894 -0.5 -0.657142857142857 -0.6000000000000001 -0.5820895522388059 1.0 -1.0 -1.0 -1.0 -1.0 -1.0 -1.0 +-1.0 -0.8181818181818182 -0.9333333333333333 -0.5555555555555556 -0.7317073170731705 -0.7708978328173379 -0.6111111111111112 -0.7714285714285714 -0.657142857142857 -0.16417910447761197 -1.0 -1.0 -1.0 -1.0 -1.0 1.0 -1.0 +-1.0 0.09090909090909083 0.3999999999999999 0.7407407407407405 0.12195121951219523 0.9690402476780187 0.7222222222222223 0.7714285714285709 0.8285714285714283 0.25373134328358193 1.0 -1.0 -1.0 -1.0 -1.0 -1.0 -1.0 +-1.0 0.8181818181818181 0.5333333333333334 -0.2962962962962963 -0.6585365853658536 -0.7647058823529416 -0.6666666666666665 -0.6000000000000001 -0.7714285714285714 -0.9701492537313433 -1.0 -1.0 -1.0 -1.0 1.0 -1.0 -1.0 +-1.0 -1.0 0.8 -0.22222222222222232 -0.4634146341463412 -0.7956656346749229 -0.8333333333333335 -0.657142857142857 -0.7714285714285714 -0.8208955223880596 -1.0 -1.0 -1.0 -1.0 1.0 -1.0 -1.0 +-1.0 0.6363636363636365 0.1333333333333333 -0.07407407407407396 -0.3902439024390243 -0.1517027863777094 -0.2777777777777777 -0.20000000000000018 -0.2571428571428571 -0.25373134328358216 -1.0 -1.0 -1.0 -1.0 -1.0 1.0 -1.0 +-1.0 0.4545454545454546 -0.8 0.2962962962962963 -0.21951219512195097 0.770897832817337 0.7222222222222223 0.7714285714285709 0.7714285714285714 0.07462686567164178 -1.0 -1.0 -1.0 1.0 -1.0 -1.0 -1.0 +-1.0 0.6363636363636365 -0.8 0.11111111111111116 -0.36585365853658525 0.1764705882352935 0.16666666666666652 0.31428571428571406 0.19999999999999973 -0.04477611940298498 -1.0 -1.0 -1.0 -1.0 -1.0 1.0 -1.0 +-1.0 -0.09090909090909105 -0.1333333333333333 0.2962962962962963 -0.24390243902439002 0.5108359133126936 0.38888888888888884 0.4285714285714284 0.4285714285714284 0.7014925373134326 -1.0 -1.0 -1.0 -1.0 -1.0 1.0 -1.0 +-1.0 0.8181818181818181 -0.33333333333333337 0.11111111111111116 -0.2926829268292681 -0.5789473684210531 -0.5 -0.6000000000000001 -0.5428571428571427 -0.19402985074626855 1.0 -1.0 -1.0 -1.0 -1.0 -1.0 -1.0 +-1.0 -0.2727272727272727 0.33333333333333326 0.03703703703703676 -0.24390243902439002 0.27554179566563475 0.16666666666666652 0.19999999999999973 0.19999999999999973 -0.3731343283582089 -1.0 -1.0 1.0 -1.0 -1.0 -1.0 -1.0 +-1.0 -0.6363636363636365 -0.6666666666666666 -0.18518518518518512 -0.2926829268292681 -0.5603715170278636 -0.38888888888888884 -0.6000000000000001 -0.6000000000000001 -0.3731343283582089 -1.0 -1.0 -1.0 1.0 -1.0 -1.0 -1.0 +-1.0 -0.2727272727272727 0.1333333333333333 -0.07407407407407396 -0.12195121951219501 0.24458204334365297 0.11111111111111116 0.2571428571428571 0.19999999999999973 -0.16417910447761197 -1.0 -1.0 -1.0 -1.0 -1.0 -1.0 1.0 +-1.0 -0.2727272727272727 -0.33333333333333337 0.18518518518518512 -0.024390243902438824 0.15789473684210487 0.16666666666666652 0.2571428571428571 0.08571428571428585 -0.04477611940298498 -1.0 -1.0 -1.0 -1.0 -1.0 -1.0 1.0 +-1.0 -1.0 -0.4666666666666667 -0.6296296296296298 -0.6829268292682924 -0.9195046439628487 -0.7222222222222223 -0.7714285714285714 -0.9428571428571431 -0.4328358208955223 -1.0 -1.0 1.0 -1.0 -1.0 -1.0 -1.0 +-1.0 -0.6363636363636365 -0.5333333333333333 -0.07407407407407396 -0.5609756097560974 -0.541795665634675 -0.6111111111111112 -0.4285714285714288 -0.6000000000000001 0.25373134328358193 -1.0 -1.0 -1.0 -1.0 -1.0 1.0 -1.0 +-1.0 -1.0 -0.06666666666666665 -0.2592592592592595 -0.6585365853658536 -0.8761609907120742 -0.7222222222222223 -0.7142857142857144 -0.8857142857142857 0.10447761194029859 1.0 -1.0 -1.0 -1.0 -1.0 -1.0 -1.0 +-1.0 -0.09090909090909105 -0.5333333333333333 0.8888888888888888 0.21951219512195141 0.44891640866873006 0.4444444444444442 0.37142857142857144 0.4285714285714284 0.5820895522388059 -1.0 -1.0 -1.0 -1.0 -1.0 -1.0 1.0 +-1.0 -0.8181818181818182 -0.4 -0.18518518518518512 -0.3414634146341462 -0.7337461300309598 -0.6111111111111112 -0.7714285714285714 -0.7142857142857144 -0.9402985074626865 -1.0 -1.0 -1.0 -1.0 -1.0 -1.0 1.0 +-1.0 0.9999999999999998 -0.8666666666666667 -0.5925925925925926 -0.6341463414634145 -0.8823529411764706 -0.9444444444444446 -0.657142857142857 -0.8285714285714287 -0.10447761194029859 -1.0 -1.0 1.0 -1.0 -1.0 -1.0 -1.0 +-1.0 0.6363636363636365 0.7333333333333334 0.11111111111111116 -0.43902439024390216 -0.3312693498452015 -0.44444444444444464 -0.20000000000000018 -0.37142857142857144 -0.6716417910447761 -1.0 -1.0 -1.0 -1.0 1.0 -1.0 -1.0 +-1.0 0.2727272727272727 -0.6 0.6296296296296293 -0.09756097560975596 0.9876160990712068 0.833333333333333 0.8285714285714287 0.8857142857142852 0.9999999999999998 -1.0 -1.0 -1.0 1.0 -1.0 -1.0 -1.0 +-1.0 0.8181818181818181 0.0 -0.18518518518518512 -0.5121951219512193 -0.653250773993808 -0.5 -0.7142857142857144 -0.7142857142857144 -0.8208955223880596 -1.0 -1.0 -1.0 -1.0 -1.0 -1.0 1.0 +-1.0 0.4545454545454546 -0.4 0.37037037037037024 -0.04878048780487787 0.6842105263157889 0.6111111111111112 0.7714285714285709 0.7142857142857144 0.880597014925373 -1.0 -1.0 1.0 -1.0 -1.0 -1.0 -1.0 +-1.0 0.09090909090909083 0.8666666666666667 0.7777777777777777 0.21951219512195141 0.9938080495356032 1.0 0.9428571428571426 1.0 0.4626865671641791 1.0 -1.0 -1.0 -1.0 -1.0 -1.0 -1.0 +-1.0 0.2727272727272727 -0.8666666666666667 0.5555555555555554 -0.07317073170731692 0.9938080495356032 1.0 0.9999999999999996 0.8857142857142852 0.9402985074626866 -1.0 -1.0 -1.0 -1.0 -1.0 -1.0 1.0 +-1.0 0.9999999999999998 -1.0 -0.37037037037037046 -0.5853658536585364 -0.8575851393188856 -0.8333333333333335 -0.8857142857142857 -0.7142857142857144 -0.6716417910447761 -1.0 -1.0 -1.0 -1.0 1.0 -1.0 -1.0 +-1.0 0.4545454545454546 0.6000000000000001 0.07407407407407396 -0.21951219512195097 0.39318885448916374 0.2777777777777777 0.4857142857142853 0.31428571428571406 0.014925373134328401 -1.0 -1.0 -1.0 1.0 -1.0 -1.0 -1.0 +-1.0 0.9999999999999998 0.46666666666666656 -0.4814814814814816 -0.7560975609756095 -1.0 -0.7777777777777777 -0.8285714285714287 -1.0 -0.791044776119403 1.0 -1.0 -1.0 -1.0 -1.0 -1.0 -1.0 +-1.0 0.9999999999999998 -0.9333333333333333 -0.37037037037037046 -0.7317073170731705 -0.8699690402476778 -0.7222222222222223 -0.7142857142857144 -0.7142857142857144 -0.6119402985074627 1.0 -1.0 -1.0 -1.0 -1.0 -1.0 -1.0 +-1.0 0.6363636363636365 -0.19999999999999996 0.0 -0.24390243902439002 -0.04024767801857587 0.05555555555555536 -0.08571428571428585 -0.08571428571428585 -0.13432835820895517 -1.0 -1.0 -1.0 -1.0 1.0 -1.0 -1.0 +-1.0 0.09090909090909083 0.46666666666666656 0.7037037037037037 -0.12195121951219501 0.9752321981424141 0.8888888888888884 0.9999999999999996 0.8285714285714283 0.7313432835820894 -1.0 -1.0 1.0 -1.0 -1.0 -1.0 -1.0 +-1.0 -0.09090909090909105 -0.19999999999999996 0.11111111111111116 -0.14634146341463405 0.4984520123839 0.38888888888888884 0.4857142857142853 0.4285714285714284 0.5223880597014927 -1.0 1.0 -1.0 -1.0 -1.0 -1.0 -1.0 +-1.0 -0.8181818181818182 -0.06666666666666665 -0.2592592592592595 -0.43902439024390216 -0.7027863777089784 -0.7222222222222223 -0.657142857142857 -0.7142857142857144 -0.25373134328358216 -1.0 1.0 -1.0 -1.0 -1.0 -1.0 -1.0 +-1.0 0.2727272727272727 -0.5333333333333333 0.37037037037037024 -0.09756097560975596 0.9814241486068105 0.9444444444444446 0.8285714285714287 0.8857142857142852 0.10447761194029859 -1.0 1.0 -1.0 -1.0 -1.0 -1.0 -1.0 +-1.0 0.09090909090909083 -0.2666666666666666 0.4444444444444442 -0.04878048780487787 0.8761609907120742 0.833333333333333 0.7714285714285709 0.8857142857142852 0.28358208955223874 -1.0 -1.0 -1.0 -1.0 -1.0 1.0 -1.0 +-1.0 0.6363636363636365 -0.8666666666666667 0.03703703703703676 -0.26829268292682906 0.20123839009287892 0.22222222222222232 0.2571428571428571 0.19999999999999973 -0.3731343283582089 -1.0 1.0 -1.0 -1.0 -1.0 -1.0 -1.0 +-1.0 -0.4545454545454546 0.1333333333333333 0.22222222222222188 0.024390243902439268 -0.1517027863777094 -0.22222222222222232 -0.2571428571428571 -0.2571428571428571 -0.6716417910447761 -1.0 1.0 -1.0 -1.0 -1.0 -1.0 -1.0 +-1.0 -0.8181818181818182 0.6000000000000001 -0.07407407407407396 -0.41463414634146334 -0.6408668730650158 -0.5555555555555558 -0.7142857142857144 -0.7142857142857144 -0.791044776119403 -1.0 -1.0 -1.0 -1.0 1.0 -1.0 -1.0 +-1.0 -1.0 -0.9333333333333333 -0.6666666666666667 -0.7560975609756095 -0.9628482972136223 -1.0 -0.7714285714285714 -1.0 -0.014925373134328401 -1.0 -1.0 1.0 -1.0 -1.0 -1.0 -1.0 +-1.0 -0.8181818181818182 0.33333333333333326 -0.40740740740740744 -0.5609756097560974 -0.6656346749226008 -0.5555555555555558 -0.5428571428571431 -0.5428571428571427 -0.4626865671641791 -1.0 -1.0 -1.0 1.0 -1.0 -1.0 -1.0 +-1.0 -0.6363636363636365 0.5333333333333334 -0.18518518518518512 -0.5609756097560974 -0.3931888544891642 -0.2777777777777777 -0.4285714285714288 -0.3142857142857145 0.31343283582089554 -1.0 -1.0 -1.0 -1.0 1.0 -1.0 -1.0 +-1.0 0.09090909090909083 0.7333333333333334 0.8518518518518516 0.07317073170731736 0.9938080495356032 0.7777777777777777 0.8285714285714287 1.0 0.5223880597014927 -1.0 -1.0 -1.0 -1.0 -1.0 -1.0 1.0 +-1.0 -0.8181818181818182 -0.8 -0.40740740740740744 -0.6585365853658536 -0.7585139318885452 -0.8333333333333335 -0.5428571428571431 -0.6000000000000001 -0.5223880597014925 -1.0 -1.0 -1.0 -1.0 1.0 -1.0 -1.0 +-1.0 0.6363636363636365 -0.6 0.14814814814814792 -0.31707317073170715 0.10216718266253855 0.16666666666666652 0.19999999999999973 0.1428571428571428 0.4925373134328359 1.0 -1.0 -1.0 -1.0 -1.0 -1.0 -1.0 +-1.0 -0.4545454545454546 -0.8 0.03703703703703676 -0.17073170731707288 -0.2941176470588238 -0.2777777777777777 -0.2571428571428571 -0.3142857142857145 -0.4925373134328359 -1.0 1.0 -1.0 -1.0 -1.0 -1.0 -1.0 +-1.0 -0.8181818181818182 0.5333333333333334 -0.40740740740740744 -0.3902439024390243 -0.6470588235294121 -0.6666666666666665 -0.6000000000000001 -0.657142857142857 -0.4626865671641791 -1.0 -1.0 -1.0 -1.0 -1.0 -1.0 1.0 +-1.0 0.6363636363636365 -0.5333333333333333 0.03703703703703676 -0.2926829268292681 0.07739938080495312 0.05555555555555536 0.08571428571428541 -0.02857142857142847 0.34328358208955234 -1.0 -1.0 1.0 -1.0 -1.0 -1.0 -1.0 +-1.0 0.4545454545454546 -0.06666666666666665 0.4814814814814814 0.07317073170731736 0.6037151702786372 0.38888888888888884 0.714285714285714 0.4285714285714284 0.07462686567164178 -1.0 -1.0 -1.0 -1.0 1.0 -1.0 -1.0 +-1.0 -1.0 -0.1333333333333333 -0.4814814814814816 -0.5121951219512193 -0.8823529411764706 -0.8888888888888888 -0.9428571428571431 -0.8857142857142857 -0.10447761194029859 -1.0 -1.0 -1.0 -1.0 1.0 -1.0 -1.0 +-1.0 -0.4545454545454546 -1.0 0.22222222222222188 -0.07317073170731692 -0.32507739938080515 -0.2777777777777777 -0.2571428571428571 -0.37142857142857144 -0.6119402985074627 1.0 -1.0 -1.0 -1.0 -1.0 -1.0 -1.0 +-1.0 0.6363636363636365 0.06666666666666665 0.0 -0.3902439024390243 -0.13312693498452033 -0.11111111111111116 -0.02857142857142847 -0.1428571428571428 0.014925373134328401 -1.0 1.0 -1.0 -1.0 -1.0 -1.0 -1.0 +-1.0 -0.09090909090909105 0.1333333333333333 0.33333333333333304 -0.21951219512195097 0.5541795665634672 0.4444444444444442 0.657142857142857 0.4285714285714284 0.4626865671641791 -1.0 -1.0 1.0 -1.0 -1.0 -1.0 -1.0 +-1.0 0.9999999999999998 0.6666666666666667 -0.7777777777777779 -0.8292682926829267 -0.9938080495356036 -0.7777777777777777 -0.8857142857142857 -0.8857142857142857 -0.10447761194029859 -1.0 1.0 -1.0 -1.0 -1.0 -1.0 -1.0 +-1.0 -0.2727272727272727 0.06666666666666665 -0.18518518518518512 -0.3902439024390243 0.23219814241486025 0.16666666666666652 0.08571428571428541 0.19999999999999973 -0.19402985074626855 -1.0 -1.0 -1.0 -1.0 -1.0 1.0 -1.0 +-1.0 0.8181818181818181 0.2666666666666666 -0.2592592592592595 -0.4634146341463412 -0.7089783281733748 -0.6666666666666665 -0.5428571428571431 -0.7714285714285714 -0.9402985074626865 -1.0 -1.0 -1.0 1.0 -1.0 -1.0 -1.0 +-1.0 0.9999999999999998 0.1333333333333333 -1.0 -0.9999999999999999 -0.9938080495356036 -0.8333333333333335 -1.0 -0.8857142857142857 -0.7611940298507462 -1.0 -1.0 -1.0 1.0 -1.0 -1.0 -1.0 +-1.0 0.4545454545454546 0.06666666666666665 0.33333333333333304 -0.024390243902438824 0.5603715170278636 0.38888888888888884 0.5428571428571431 0.48571428571428577 0.6716417910447763 -1.0 -1.0 1.0 -1.0 -1.0 -1.0 -1.0 +-1.0 -0.8181818181818182 0.6666666666666667 -0.11111111111111116 -0.36585365853658525 -0.6284829721362231 -0.6111111111111112 -0.4285714285714288 -0.48571428571428577 0.10447761194029859 1.0 -1.0 -1.0 -1.0 -1.0 -1.0 -1.0 +-1.0 -0.8181818181818182 0.3999999999999999 -0.3333333333333335 -0.6097560975609755 -0.6594427244582044 -0.7222222222222223 -0.7142857142857144 -0.657142857142857 -0.07462686567164178 -1.0 1.0 -1.0 -1.0 -1.0 -1.0 -1.0 +-1.0 -0.09090909090909105 0.6666666666666667 0.2592592592592591 -0.12195121951219501 0.6594427244582044 0.4444444444444442 0.6000000000000001 0.5999999999999996 0.25373134328358193 -1.0 -1.0 -1.0 1.0 -1.0 -1.0 -1.0 +-1.0 0.09090909090909083 -0.33333333333333337 0.33333333333333304 -0.04878048780487787 0.8699690402476778 0.833333333333333 0.8857142857142857 0.7714285714285714 0.28358208955223874 -1.0 1.0 -1.0 -1.0 -1.0 -1.0 -1.0 +-1.0 0.9999999999999998 0.9333333333333333 -0.5185185185185186 -0.6829268292682924 -0.9814241486068114 -0.8333333333333335 -1.0 -1.0 -0.5820895522388059 1.0 -1.0 -1.0 -1.0 -1.0 -1.0 -1.0 +-1.0 0.09090909090909083 -0.4666666666666667 0.22222222222222188 -0.04878048780487787 0.8452012383900933 0.6111111111111112 0.8857142857142857 0.8285714285714283 -0.04477611940298498 -1.0 -1.0 1.0 -1.0 -1.0 -1.0 -1.0 +-1.0 -0.09090909090909105 0.33333333333333326 0.2962962962962963 2.220446049250313e-16 0.5913312693498445 0.5 0.657142857142857 0.5428571428571427 -0.13432835820895517 -1.0 -1.0 -1.0 -1.0 -1.0 1.0 -1.0 +-1.0 -0.6363636363636365 -0.9333333333333333 -0.2962962962962963 -0.43902439024390216 -0.5975232198142417 -0.6666666666666665 -0.5428571428571431 -0.5428571428571427 -0.7313432835820894 -1.0 -1.0 -1.0 -1.0 -1.0 -1.0 1.0 +-1.0 -0.8181818181818182 0.2666666666666666 -0.3333333333333335 -0.6097560975609755 -0.6718266253869971 -0.6111111111111112 -0.48571428571428577 -0.6000000000000001 -0.5522388059701493 -1.0 -1.0 1.0 -1.0 -1.0 -1.0 -1.0 +-1.0 0.4545454545454546 -0.4666666666666667 0.18518518518518512 -0.09756097560975596 0.702786377708978 0.5 0.7714285714285709 0.5428571428571427 0.4925373134328359 1.0 -1.0 -1.0 -1.0 -1.0 -1.0 -1.0 +-1.0 0.4545454545454546 0.6666666666666667 0.18518518518518512 2.220446049250313e-16 0.3684210526315792 0.2777777777777777 0.31428571428571406 0.4285714285714284 0.37313432835820914 -1.0 1.0 -1.0 -1.0 -1.0 -1.0 -1.0 +-1.0 -1.0 0.3999999999999999 -0.37037037037037046 -0.5853658536585364 -0.8266253869969042 -0.6666666666666665 -0.8857142857142857 -0.7714285714285714 -0.04477611940298498 1.0 -1.0 -1.0 -1.0 -1.0 -1.0 -1.0 +-1.0 0.8181818181818181 0.7333333333333334 -0.37037037037037046 -0.5609756097560974 -0.8080495356037152 -0.6111111111111112 -0.8285714285714287 -0.7142857142857144 -0.25373134328358216 -1.0 -1.0 -1.0 1.0 -1.0 -1.0 -1.0 +-1.0 -0.09090909090909105 -0.2666666666666666 0.18518518518518512 -0.26829268292682906 0.4860681114551073 0.33333333333333304 0.5428571428571431 0.48571428571428577 0.6417910447761195 -1.0 -1.0 -1.0 1.0 -1.0 -1.0 -1.0 +-1.0 0.6363636363636365 0.2666666666666666 -0.0370370370370372 -0.43902439024390216 -0.195046439628483 -0.0555555555555558 -0.2571428571428571 -0.20000000000000018 -0.5522388059701493 -1.0 -1.0 -1.0 -1.0 1.0 -1.0 -1.0 +-1.0 0.09090909090909083 -0.19999999999999996 0.4444444444444442 0.024390243902439268 0.888544891640866 0.833333333333333 0.8285714285714287 0.8285714285714283 -0.16417910447761197 -1.0 -1.0 -1.0 -1.0 -1.0 -1.0 1.0 +-1.0 0.8181818181818181 -0.6 -0.14814814814814836 -0.36585365853658525 -0.51702786377709 -0.44444444444444464 -0.4285714285714288 -0.6000000000000001 -0.791044776119403 -1.0 1.0 -1.0 -1.0 -1.0 -1.0 -1.0 +-1.0 0.6363636363636365 -1.0 0.14814814814814792 -0.21951219512195097 0.2507739938080493 0.2777777777777777 0.37142857142857144 0.1428571428571428 -0.22388059701492535 -1.0 -1.0 1.0 -1.0 -1.0 -1.0 -1.0 +-1.0 0.8181818181818181 0.3999999999999999 -0.2592592592592595 -0.5365853658536583 -0.7399380804953566 -0.7222222222222223 -0.5428571428571431 -0.7142857142857144 -0.10447761194029859 -1.0 -1.0 -1.0 -1.0 -1.0 1.0 -1.0 +-1.0 -0.09090909090909105 -1.0 0.33333333333333304 0.07317073170731736 0.3808049535603719 0.33333333333333304 0.31428571428571406 0.2571428571428571 -0.10447761194029859 -1.0 -1.0 -1.0 -1.0 -1.0 -1.0 1.0 +-1.0 -0.2727272727272727 -0.19999999999999996 0.7037037037037037 0.024390243902439268 0.1888544891640862 0.22222222222222232 0.19999999999999973 0.2571428571428571 0.16417910447761197 1.0 -1.0 -1.0 -1.0 -1.0 -1.0 -1.0 +-1.0 -0.09090909090909105 -0.8666666666666667 0.4814814814814814 -0.12195121951219501 0.3993808049535601 0.2777777777777777 0.4285714285714284 0.2571428571428571 -0.19402985074626855 1.0 -1.0 -1.0 -1.0 -1.0 -1.0 -1.0 +-1.0 -0.4545454545454546 -0.2666666666666666 -0.11111111111111116 -0.43902439024390216 -0.21981424148606798 -0.2777777777777777 -0.2571428571428571 -0.2571428571428571 -0.014925373134328401 -1.0 -1.0 -1.0 -1.0 -1.0 1.0 -1.0 +-1.0 -0.6363636363636365 0.9999999999999998 0.07407407407407396 -0.19512195121951192 -0.3312693498452015 -0.22222222222222232 -0.2571428571428571 -0.3142857142857145 -0.16417910447761197 -1.0 -1.0 -1.0 -1.0 1.0 -1.0 -1.0 +-1.0 0.9999999999999998 -0.1333333333333333 -0.7037037037037037 -0.8780487804878048 -0.9814241486068114 -0.7777777777777777 -0.8857142857142857 -0.9428571428571431 -0.3731343283582089 -1.0 -1.0 -1.0 -1.0 -1.0 -1.0 1.0 +-1.0 0.2727272727272727 -0.7333333333333334 0.4814814814814814 0.09756097560975618 0.9938080495356032 0.8888888888888884 0.9999999999999996 0.9428571428571431 0.28358208955223874 1.0 -1.0 -1.0 -1.0 -1.0 -1.0 -1.0 +-1.0 -0.2727272727272727 -0.8 0.9259259259259256 -0.04878048780487787 0.06501547987616041 0.0 0.08571428571428541 0.1428571428571428 -0.014925373134328401 -1.0 -1.0 -1.0 -1.0 -1.0 -1.0 1.0 +-1.0 0.9999999999999998 0.9999999999999998 -0.5185185185185186 -0.4634146341463412 -0.975232198142415 -0.9444444444444446 -0.8857142857142857 -0.8285714285714287 -0.13432835820895517 -1.0 -1.0 1.0 -1.0 -1.0 -1.0 -1.0 +-1.0 -1.0 0.33333333333333326 -0.5185185185185186 -0.5853658536585364 -0.8328173374613006 -0.8888888888888888 -0.7142857142857144 -0.8857142857142857 -0.13432835820895517 -1.0 -1.0 -1.0 -1.0 1.0 -1.0 -1.0 +-1.0 0.09090909090909083 -0.4 0.4444444444444442 -0.12195121951219501 0.8575851393188851 0.6666666666666665 0.7714285714285709 0.8285714285714283 0.9999999999999998 -1.0 -1.0 -1.0 1.0 -1.0 -1.0 -1.0 +-1.0 -0.6363636363636365 -0.06666666666666665 -0.2962962962962963 -0.6585365853658536 -0.47368421052631593 -0.5555555555555558 -0.3142857142857145 -0.5428571428571427 0.25373134328358193 -1.0 -1.0 -1.0 -1.0 -1.0 1.0 -1.0 +-1.0 -0.4545454545454546 0.19999999999999996 0.5555555555555554 0.3170731707317076 -0.13931888544891669 0.0 -0.02857142857142847 -0.1428571428571428 -0.014925373134328401 -1.0 -1.0 -1.0 -1.0 -1.0 1.0 -1.0 +-1.0 0.6363636363636365 -0.1333333333333333 0.14814814814814792 -0.3902439024390243 -0.06501547987616085 -0.16666666666666696 0.02857142857142847 -0.08571428571428585 0.4925373134328359 1.0 -1.0 -1.0 -1.0 -1.0 -1.0 -1.0 +-1.0 -0.4545454545454546 -0.06666666666666665 -0.11111111111111116 -0.41463414634146334 -0.18266253869969074 -0.0555555555555558 -0.1428571428571428 -0.08571428571428585 -0.6417910447761195 1.0 -1.0 -1.0 -1.0 -1.0 -1.0 -1.0 diff -r 21d3e08b1a48 -r 82b6104d4682 test-data/regression_groups.tabular --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/regression_groups.tabular Fri Aug 09 07:12:16 2019 -0400 @@ -0,0 +1,262 @@ +groups +train +train +train +train +test +test +train +train +validation +train +train +train +train +train +validation +validation +train +train +train +test +test +validation +train +validation +test +validation +train +train +train +test +test +test +train +test +train +train +train +test +train +train +train +train +test +train +train +train +train +train +train +train +train +train +test +test +validation +train +validation +train +train +train +train +test +train +train +validation +validation +train +train +train +train +validation +test +test +train +train +train +train +train +train +train +validation +train +train +train +train +test +train +validation +train +test +test +test +train +train +train +test +train +train +train +train +train +train +train +train +train +train +validation +train +train +train +train +validation +train +validation +train +validation +validation +train +validation +train +test +train +train +train +train +test +validation +test +train +train +train +train +test +train +train +train +test +validation +train +train +train +train +train +validation +test +train +train +test +train +train +validation +train +train +train +train +train +test +test +validation +train +test +train +validation +train +train +train +test +train +train +train +train +train +train +validation +train +train +train +train +validation +test +train +train +train +validation +train +test +test +validation +train +validation +validation +test +test +test +train +train +test +train +train +validation +test +test +train +train +train +test +test +train +train +train +train +train +test +train +train +test +validation +test +train +train +test +train +train +train +validation +train +validation +train +validation +train +train +train +validation +validation +test +validation +train +test +train +validation +train +train +test +train +train +test +test +train +validation +train +train +train +train +train +train +train +train +validation +train +test +train diff -r 21d3e08b1a48 -r 82b6104d4682 test-data/searchCV01 Binary file test-data/searchCV01 has changed diff -r 21d3e08b1a48 -r 82b6104d4682 test-data/searchCV02 Binary file test-data/searchCV02 has changed diff -r 21d3e08b1a48 -r 82b6104d4682 test-data/train_test_eval01.tabular --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/train_test_eval01.tabular Fri Aug 09 07:12:16 2019 -0400 @@ -0,0 +1,2 @@ +neg_mean_absolute_error r2 +-5.29904520286704 0.6841931628349759 diff -r 21d3e08b1a48 -r 82b6104d4682 test-data/train_test_eval03.tabular --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/train_test_eval03.tabular Fri Aug 09 07:12:16 2019 -0400 @@ -0,0 +1,2 @@ +neg_mean_absolute_error r2 +-4.811320754716981 0.7343422874316201 diff -r 21d3e08b1a48 -r 82b6104d4682 test-data/train_test_eval_model01 Binary file test-data/train_test_eval_model01 has changed diff -r 21d3e08b1a48 -r 82b6104d4682 test-data/train_test_eval_weights01.h5 Binary file test-data/train_test_eval_weights01.h5 has changed diff -r 21d3e08b1a48 -r 82b6104d4682 test-data/train_test_eval_weights02.h5 Binary file test-data/train_test_eval_weights02.h5 has changed diff -r 21d3e08b1a48 -r 82b6104d4682 train_test_eval.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/train_test_eval.py Fri Aug 09 07:12:16 2019 -0400 @@ -0,0 +1,433 @@ +import argparse +import joblib +import json +import numpy as np +import pandas as pd +import pickle +import warnings +from itertools import chain +from scipy.io import mmread +from sklearn.base import clone +from sklearn import (cluster, compose, decomposition, ensemble, + feature_extraction, feature_selection, + gaussian_process, kernel_approximation, metrics, + model_selection, naive_bayes, neighbors, + pipeline, preprocessing, svm, linear_model, + tree, discriminant_analysis) +from sklearn.exceptions import FitFailedWarning +from sklearn.metrics.scorer import _check_multimetric_scoring +from sklearn.model_selection._validation import _score, cross_validate +from sklearn.model_selection import _search, _validation +from sklearn.utils import indexable, safe_indexing + +from galaxy_ml.model_validations import train_test_split +from galaxy_ml.utils import (SafeEval, get_scoring, load_model, + read_columns, try_get_attr, get_module) + + +_fit_and_score = try_get_attr('galaxy_ml.model_validations', '_fit_and_score') +setattr(_search, '_fit_and_score', _fit_and_score) +setattr(_validation, '_fit_and_score', _fit_and_score) + +N_JOBS = int(__import__('os').environ.get('GALAXY_SLOTS', 1)) +CACHE_DIR = './cached' +NON_SEARCHABLE = ('n_jobs', 'pre_dispatch', 'memory', '_path', + 'nthread', 'callbacks') +ALLOWED_CALLBACKS = ('EarlyStopping', 'TerminateOnNaN', 'ReduceLROnPlateau', + 'CSVLogger', 'None') + + +def _eval_swap_params(params_builder): + swap_params = {} + + for p in params_builder['param_set']: + swap_value = p['sp_value'].strip() + if swap_value == '': + continue + + param_name = p['sp_name'] + if param_name.lower().endswith(NON_SEARCHABLE): + warnings.warn("Warning: `%s` is not eligible for search and was " + "omitted!" % param_name) + continue + + if not swap_value.startswith(':'): + safe_eval = SafeEval(load_scipy=True, load_numpy=True) + ev = safe_eval(swap_value) + else: + # Have `:` before search list, asks for estimator evaluatio + safe_eval_es = SafeEval(load_estimators=True) + swap_value = swap_value[1:].strip() + # TODO maybe add regular express check + ev = safe_eval_es(swap_value) + + swap_params[param_name] = ev + + return swap_params + + +def train_test_split_none(*arrays, **kwargs): + """extend train_test_split to take None arrays + and support split by group names. + """ + nones = [] + new_arrays = [] + for idx, arr in enumerate(arrays): + if arr is None: + nones.append(idx) + else: + new_arrays.append(arr) + + if kwargs['shuffle'] == 'None': + kwargs['shuffle'] = None + + group_names = kwargs.pop('group_names', None) + + if group_names is not None and group_names.strip(): + group_names = [name.strip() for name in + group_names.split(',')] + new_arrays = indexable(*new_arrays) + groups = kwargs['labels'] + n_samples = new_arrays[0].shape[0] + index_arr = np.arange(n_samples) + test = index_arr[np.isin(groups, group_names)] + train = index_arr[~np.isin(groups, group_names)] + rval = list(chain.from_iterable( + (safe_indexing(a, train), + safe_indexing(a, test)) for a in new_arrays)) + else: + rval = train_test_split(*new_arrays, **kwargs) + + for pos in nones: + rval[pos * 2: 2] = [None, None] + + return rval + + +def main(inputs, infile_estimator, infile1, infile2, + outfile_result, outfile_object=None, + outfile_weights=None, groups=None, + ref_seq=None, intervals=None, targets=None, + fasta_path=None): + """ + Parameter + --------- + inputs : str + File path to galaxy tool parameter + + infile_estimator : str + File path to estimator + + infile1 : str + File path to dataset containing features + + infile2 : str + File path to dataset containing target values + + outfile_result : str + File path to save the results, either cv_results or test result + + outfile_object : str, optional + File path to save searchCV object + + outfile_weights : str, optional + File path to save deep learning model weights + + groups : str + File path to dataset containing groups labels + + ref_seq : str + File path to dataset containing genome sequence file + + intervals : str + File path to dataset containing interval file + + targets : str + File path to dataset compressed target bed file + + fasta_path : str + File path to dataset containing fasta file + """ + warnings.simplefilter('ignore') + + with open(inputs, 'r') as param_handler: + params = json.load(param_handler) + + # load estimator + with open(infile_estimator, 'rb') as estimator_handler: + estimator = load_model(estimator_handler) + + # swap hyperparameter + swapping = params['experiment_schemes']['hyperparams_swapping'] + swap_params = _eval_swap_params(swapping) + estimator.set_params(**swap_params) + + estimator_params = estimator.get_params() + + # store read dataframe object + loaded_df = {} + + input_type = params['input_options']['selected_input'] + # tabular input + if input_type == 'tabular': + header = 'infer' if params['input_options']['header1'] else None + column_option = (params['input_options']['column_selector_options_1'] + ['selected_column_selector_option']) + if column_option in ['by_index_number', 'all_but_by_index_number', + 'by_header_name', 'all_but_by_header_name']: + c = params['input_options']['column_selector_options_1']['col1'] + else: + c = None + + df_key = infile1 + repr(header) + df = pd.read_csv(infile1, sep='\t', header=header, + parse_dates=True) + loaded_df[df_key] = df + + X = read_columns(df, c=c, c_option=column_option).astype(float) + # sparse input + elif input_type == 'sparse': + X = mmread(open(infile1, 'r')) + + # fasta_file input + elif input_type == 'seq_fasta': + pyfaidx = get_module('pyfaidx') + sequences = pyfaidx.Fasta(fasta_path) + n_seqs = len(sequences.keys()) + X = np.arange(n_seqs)[:, np.newaxis] + for param in estimator_params.keys(): + if param.endswith('fasta_path'): + estimator.set_params( + **{param: fasta_path}) + break + else: + raise ValueError( + "The selected estimator doesn't support " + "fasta file input! Please consider using " + "KerasGBatchClassifier with " + "FastaDNABatchGenerator/FastaProteinBatchGenerator " + "or having GenomeOneHotEncoder/ProteinOneHotEncoder " + "in pipeline!") + + elif input_type == 'refseq_and_interval': + path_params = { + 'data_batch_generator__ref_genome_path': ref_seq, + 'data_batch_generator__intervals_path': intervals, + 'data_batch_generator__target_path': targets + } + estimator.set_params(**path_params) + n_intervals = sum(1 for line in open(intervals)) + X = np.arange(n_intervals)[:, np.newaxis] + + # Get target y + header = 'infer' if params['input_options']['header2'] else None + column_option = (params['input_options']['column_selector_options_2'] + ['selected_column_selector_option2']) + if column_option in ['by_index_number', 'all_but_by_index_number', + 'by_header_name', 'all_but_by_header_name']: + c = params['input_options']['column_selector_options_2']['col2'] + else: + c = None + + df_key = infile2 + repr(header) + if df_key in loaded_df: + infile2 = loaded_df[df_key] + else: + infile2 = pd.read_csv(infile2, sep='\t', + header=header, parse_dates=True) + loaded_df[df_key] = infile2 + + y = read_columns( + infile2, + c=c, + c_option=column_option, + sep='\t', + header=header, + parse_dates=True) + if len(y.shape) == 2 and y.shape[1] == 1: + y = y.ravel() + if input_type == 'refseq_and_interval': + estimator.set_params( + data_batch_generator__features=y.ravel().tolist()) + y = None + # end y + + # load groups + if groups: + groups_selector = (params['experiment_schemes']['test_split'] + ['split_algos']).pop('groups_selector') + + header = 'infer' if groups_selector['header_g'] else None + column_option = \ + (groups_selector['column_selector_options_g'] + ['selected_column_selector_option_g']) + if column_option in ['by_index_number', 'all_but_by_index_number', + 'by_header_name', 'all_but_by_header_name']: + c = groups_selector['column_selector_options_g']['col_g'] + else: + c = None + + df_key = groups + repr(header) + if df_key in loaded_df: + groups = loaded_df[df_key] + + groups = read_columns( + groups, + c=c, + c_option=column_option, + sep='\t', + header=header, + parse_dates=True) + groups = groups.ravel() + + # del loaded_df + del loaded_df + + # handle memory + memory = joblib.Memory(location=CACHE_DIR, verbose=0) + # cache iraps_core fits could increase search speed significantly + if estimator.__class__.__name__ == 'IRAPSClassifier': + estimator.set_params(memory=memory) + else: + # For iraps buried in pipeline + new_params = {} + for p, v in estimator_params.items(): + if p.endswith('memory'): + # for case of `__irapsclassifier__memory` + if len(p) > 8 and p[:-8].endswith('irapsclassifier'): + # cache iraps_core fits could increase search + # speed significantly + new_params[p] = memory + # security reason, we don't want memory being + # modified unexpectedly + elif v: + new_params[p] = None + # handle n_jobs + elif p.endswith('n_jobs'): + # For now, 1 CPU is suggested for iprasclassifier + if len(p) > 8 and p[:-8].endswith('irapsclassifier'): + new_params[p] = 1 + else: + new_params[p] = N_JOBS + # for security reason, types of callback are limited + elif p.endswith('callbacks'): + for cb in v: + cb_type = cb['callback_selection']['callback_type'] + if cb_type not in ALLOWED_CALLBACKS: + raise ValueError( + "Prohibited callback type: %s!" % cb_type) + + estimator.set_params(**new_params) + + # handle scorer, convert to scorer dict + scoring = params['experiment_schemes']['metrics']['scoring'] + scorer = get_scoring(scoring) + scorer, _ = _check_multimetric_scoring(estimator, scoring=scorer) + + # handle test (first) split + test_split_options = (params['experiment_schemes'] + ['test_split']['split_algos']) + + if test_split_options['shuffle'] == 'group': + test_split_options['labels'] = groups + if test_split_options['shuffle'] == 'stratified': + if y is not None: + test_split_options['labels'] = y + else: + raise ValueError("Stratified shuffle split is not " + "applicable on empty target values!") + + X_train, X_test, y_train, y_test, groups_train, groups_test = \ + train_test_split_none(X, y, groups, **test_split_options) + + exp_scheme = params['experiment_schemes']['selected_exp_scheme'] + + # handle validation (second) split + if exp_scheme == 'train_val_test': + val_split_options = (params['experiment_schemes'] + ['val_split']['split_algos']) + + if val_split_options['shuffle'] == 'group': + val_split_options['labels'] = groups_train + if val_split_options['shuffle'] == 'stratified': + if y_train is not None: + val_split_options['labels'] = y_train + else: + raise ValueError("Stratified shuffle split is not " + "applicable on empty target values!") + + X_train, X_val, y_train, y_val, groups_train, groups_val = \ + train_test_split_none(X_train, y_train, groups_train, + **val_split_options) + + # train and eval + if hasattr(estimator, 'validation_data'): + if exp_scheme == 'train_val_test': + estimator.fit(X_train, y_train, + validation_data=(X_val, y_val)) + else: + estimator.fit(X_train, y_train, + validation_data=(X_test, y_test)) + else: + estimator.fit(X_train, y_train) + + if hasattr(estimator, 'evaluate'): + scores = estimator.evaluate(X_test, y_test=y_test, + scorer=scorer, + is_multimetric=True) + else: + scores = _score(estimator, X_test, y_test, scorer, + is_multimetric=True) + # handle output + for name, score in scores.items(): + scores[name] = [score] + df = pd.DataFrame(scores) + df = df[sorted(df.columns)] + df.to_csv(path_or_buf=outfile_result, sep='\t', + header=True, index=False) + + memory.clear(warn=False) + + if outfile_object: + main_est = estimator + if isinstance(estimator, pipeline.Pipeline): + main_est = estimator.steps[-1][-1] + + if hasattr(main_est, 'model_') \ + and hasattr(main_est, 'save_weights'): + if outfile_weights: + main_est.save_weights(outfile_weights) + del main_est.model_ + del main_est.fit_params + del main_est.model_class_ + del main_est.validation_data + if getattr(main_est, 'data_generator_', None): + del main_est.data_generator_ + del main_est.data_batch_generator + + with open(outfile_object, 'wb') as output_handler: + pickle.dump(estimator, output_handler, + pickle.HIGHEST_PROTOCOL) + + +if __name__ == '__main__': + aparser = argparse.ArgumentParser() + aparser.add_argument("-i", "--inputs", dest="inputs", required=True) + aparser.add_argument("-e", "--estimator", dest="infile_estimator") + aparser.add_argument("-X", "--infile1", dest="infile1") + aparser.add_argument("-y", "--infile2", dest="infile2") + aparser.add_argument("-O", "--outfile_result", dest="outfile_result") + aparser.add_argument("-o", "--outfile_object", dest="outfile_object") + aparser.add_argument("-w", "--outfile_weights", dest="outfile_weights") + aparser.add_argument("-g", "--groups", dest="groups") + aparser.add_argument("-r", "--ref_seq", dest="ref_seq") + aparser.add_argument("-b", "--intervals", dest="intervals") + aparser.add_argument("-t", "--targets", dest="targets") + aparser.add_argument("-f", "--fasta_path", dest="fasta_path") + args = aparser.parse_args() + + main(args.inputs, args.infile_estimator, args.infile1, args.infile2, + args.outfile_result, outfile_object=args.outfile_object, + outfile_weights=args.outfile_weights, groups=args.groups, + ref_seq=args.ref_seq, intervals=args.intervals, + targets=args.targets, fasta_path=args.fasta_path) diff -r 21d3e08b1a48 -r 82b6104d4682 utils.py --- a/utils.py Tue Jul 09 19:26:54 2019 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,599 +0,0 @@ -import ast -import json -import imblearn -import numpy as np -import pandas -import pickle -import re -import scipy -import sklearn -import skrebate -import sys -import warnings -import xgboost - -from collections import Counter -from asteval import Interpreter, make_symbol_table -from imblearn import under_sampling, over_sampling, combine -from imblearn.pipeline import Pipeline as imbPipeline -from mlxtend import regressor, classifier -from scipy.io import mmread -from sklearn import ( - cluster, compose, decomposition, ensemble, feature_extraction, - feature_selection, gaussian_process, kernel_approximation, metrics, - model_selection, naive_bayes, neighbors, pipeline, preprocessing, - svm, linear_model, tree, discriminant_analysis) - -try: - import iraps_classifier -except ImportError: - pass - -try: - import model_validations -except ImportError: - pass - -try: - import feature_selectors -except ImportError: - pass - -try: - import preprocessors -except ImportError: - pass - -# handle pickle white list file -WL_FILE = __import__('os').path.join( - __import__('os').path.dirname(__file__), 'pk_whitelist.json') - -N_JOBS = int(__import__('os').environ.get('GALAXY_SLOTS', 1)) - - -class _SafePickler(pickle.Unpickler, object): - """ - Used to safely deserialize scikit-learn model objects - Usage: - eg.: _SafePickler.load(pickled_file_object) - """ - def __init__(self, file): - super(_SafePickler, self).__init__(file) - # load global white list - with open(WL_FILE, 'r') as f: - self.pk_whitelist = json.load(f) - - self.bad_names = ( - 'and', 'as', 'assert', 'break', 'class', 'continue', - 'def', 'del', 'elif', 'else', 'except', 'exec', - 'finally', 'for', 'from', 'global', 'if', 'import', - 'in', 'is', 'lambda', 'not', 'or', 'pass', 'print', - 'raise', 'return', 'try', 'system', 'while', 'with', - 'True', 'False', 'None', 'eval', 'execfile', '__import__', - '__package__', '__subclasses__', '__bases__', '__globals__', - '__code__', '__closure__', '__func__', '__self__', '__module__', - '__dict__', '__class__', '__call__', '__get__', - '__getattribute__', '__subclasshook__', '__new__', - '__init__', 'func_globals', 'func_code', 'func_closure', - 'im_class', 'im_func', 'im_self', 'gi_code', 'gi_frame', - '__asteval__', 'f_locals', '__mro__') - - # unclassified good globals - self.good_names = [ - 'copy_reg._reconstructor', '__builtin__.object', - '__builtin__.bytearray', 'builtins.object', - 'builtins.bytearray', 'keras.engine.sequential.Sequential', - 'keras.engine.sequential.Model'] - - # custom module in Galaxy-ML - self.custom_modules = [ - '__main__', 'keras_galaxy_models', 'feature_selectors', - 'preprocessors', 'iraps_classifier', 'model_validations'] - - # override - def find_class(self, module, name): - # balack list first - if name in self.bad_names: - raise pickle.UnpicklingError("global '%s.%s' is forbidden" - % (module, name)) - - # custom module in Galaxy-ML - if module in self.custom_modules: - cutom_module = sys.modules.get(module, None) - if cutom_module: - return getattr(cutom_module, name) - else: - raise pickle.UnpicklingError("Module %s' is not imported" - % module) - - # For objects from outside libraries, it's necessary to verify - # both module and name. Currently only a blacklist checker - # is working. - # TODO: replace with a whitelist checker. - good_names = self.good_names - pk_whitelist = self.pk_whitelist - if re.match(r'^[a-zA-Z_][a-zA-Z0-9_]*$', name): - fullname = module + '.' + name - if (fullname in good_names)\ - or (module.startswith(('sklearn.', 'xgboost.', 'skrebate.', - 'imblearn.', 'mlxtend.', 'numpy.')) - or module == 'numpy'): - if fullname not in (pk_whitelist['SK_NAMES'] + - pk_whitelist['SKR_NAMES'] + - pk_whitelist['XGB_NAMES'] + - pk_whitelist['NUMPY_NAMES'] + - pk_whitelist['IMBLEARN_NAMES'] + - pk_whitelist['MLXTEND_NAMES'] + - good_names): - # raise pickle.UnpicklingError - print("Warning: global %s is not in pickler whitelist " - "yet and will loss support soon. Contact tool " - "author or leave a message at github.com" % fullname) - mod = sys.modules[module] - return getattr(mod, name) - - raise pickle.UnpicklingError("global '%s' is forbidden" % fullname) - - -def load_model(file): - """Load pickled object with `_SafePicker` - """ - return _SafePickler(file).load() - - -def read_columns(f, c=None, c_option='by_index_number', - return_df=False, **args): - """Return array from a tabular dataset by various columns selection - """ - data = pandas.read_csv(f, **args) - if c_option == 'by_index_number': - cols = list(map(lambda x: x - 1, c)) - data = data.iloc[:, cols] - if c_option == 'all_but_by_index_number': - cols = list(map(lambda x: x - 1, c)) - data.drop(data.columns[cols], axis=1, inplace=True) - if c_option == 'by_header_name': - cols = [e.strip() for e in c.split(',')] - data = data[cols] - if c_option == 'all_but_by_header_name': - cols = [e.strip() for e in c.split(',')] - data.drop(cols, axis=1, inplace=True) - y = data.values - if return_df: - return y, data - else: - return y - - -def feature_selector(inputs, X=None, y=None): - """generate an instance of sklearn.feature_selection classes - - Parameters - ---------- - inputs : dict - From galaxy tool parameters. - X : array - Containing training features. - y : array or list - Target values. - """ - selector = inputs['selected_algorithm'] - if selector != 'DyRFECV': - selector = getattr(sklearn.feature_selection, selector) - options = inputs['options'] - - if inputs['selected_algorithm'] == 'SelectFromModel': - if not options['threshold'] or options['threshold'] == 'None': - options['threshold'] = None - else: - try: - options['threshold'] = float(options['threshold']) - except ValueError: - pass - if inputs['model_inputter']['input_mode'] == 'prefitted': - model_file = inputs['model_inputter']['fitted_estimator'] - with open(model_file, 'rb') as model_handler: - fitted_estimator = load_model(model_handler) - new_selector = selector(fitted_estimator, prefit=True, **options) - else: - estimator_json = inputs['model_inputter']['estimator_selector'] - estimator = get_estimator(estimator_json) - check_feature_importances = try_get_attr( - 'feature_selectors', 'check_feature_importances') - estimator = check_feature_importances(estimator) - new_selector = selector(estimator, **options) - - elif inputs['selected_algorithm'] == 'RFE': - step = options.get('step', None) - if step and step >= 1.0: - options['step'] = int(step) - estimator = get_estimator(inputs["estimator_selector"]) - check_feature_importances = try_get_attr( - 'feature_selectors', 'check_feature_importances') - estimator = check_feature_importances(estimator) - new_selector = selector(estimator, **options) - - elif inputs['selected_algorithm'] == 'RFECV': - options['scoring'] = get_scoring(options['scoring']) - options['n_jobs'] = N_JOBS - splitter, groups = get_cv(options.pop('cv_selector')) - if groups is None: - options['cv'] = splitter - else: - options['cv'] = list(splitter.split(X, y, groups=groups)) - step = options.get('step', None) - if step and step >= 1.0: - options['step'] = int(step) - estimator = get_estimator(inputs['estimator_selector']) - check_feature_importances = try_get_attr( - 'feature_selectors', 'check_feature_importances') - estimator = check_feature_importances(estimator) - new_selector = selector(estimator, **options) - - elif inputs['selected_algorithm'] == 'DyRFECV': - options['scoring'] = get_scoring(options['scoring']) - options['n_jobs'] = N_JOBS - splitter, groups = get_cv(options.pop('cv_selector')) - if groups is None: - options['cv'] = splitter - else: - options['cv'] = list(splitter.split(X, y, groups=groups)) - step = options.get('step') - if not step or step == 'None': - step = None - else: - step = ast.literal_eval(step) - options['step'] = step - estimator = get_estimator(inputs["estimator_selector"]) - check_feature_importances = try_get_attr( - 'feature_selectors', 'check_feature_importances') - estimator = check_feature_importances(estimator) - DyRFECV = try_get_attr('feature_selectors', 'DyRFECV') - - new_selector = DyRFECV(estimator, **options) - - elif inputs['selected_algorithm'] == 'VarianceThreshold': - new_selector = selector(**options) - - else: - score_func = inputs['score_func'] - score_func = getattr(sklearn.feature_selection, score_func) - new_selector = selector(score_func, **options) - - return new_selector - - -def get_X_y(params, file1, file2): - """Return machine learning inputs X, y from tabluar inputs - """ - input_type = (params['selected_tasks']['selected_algorithms'] - ['input_options']['selected_input']) - if input_type == 'tabular': - header = 'infer' if (params['selected_tasks']['selected_algorithms'] - ['input_options']['header1']) else None - column_option = (params['selected_tasks']['selected_algorithms'] - ['input_options']['column_selector_options_1'] - ['selected_column_selector_option']) - if column_option in ['by_index_number', 'all_but_by_index_number', - 'by_header_name', 'all_but_by_header_name']: - c = (params['selected_tasks']['selected_algorithms'] - ['input_options']['column_selector_options_1']['col1']) - else: - c = None - X = read_columns( - file1, - c=c, - c_option=column_option, - sep='\t', - header=header, - parse_dates=True).astype(float) - else: - X = mmread(file1) - - header = 'infer' if (params['selected_tasks']['selected_algorithms'] - ['input_options']['header2']) else None - column_option = (params['selected_tasks']['selected_algorithms'] - ['input_options']['column_selector_options_2'] - ['selected_column_selector_option2']) - if column_option in ['by_index_number', 'all_but_by_index_number', - 'by_header_name', 'all_but_by_header_name']: - c = (params['selected_tasks']['selected_algorithms'] - ['input_options']['column_selector_options_2']['col2']) - else: - c = None - y = read_columns( - file2, - c=c, - c_option=column_option, - sep='\t', - header=header, - parse_dates=True) - y = y.ravel() - - return X, y - - -class SafeEval(Interpreter): - """Customized symbol table for safely literal eval - """ - def __init__(self, load_scipy=False, load_numpy=False, - load_estimators=False): - - # File opening and other unneeded functions could be dropped - unwanted = ['open', 'type', 'dir', 'id', 'str', 'repr'] - - # Allowed symbol table. Add more if needed. - new_syms = { - 'np_arange': getattr(np, 'arange'), - 'ensemble_ExtraTreesClassifier': - getattr(ensemble, 'ExtraTreesClassifier') - } - - syms = make_symbol_table(use_numpy=False, **new_syms) - - if load_scipy: - scipy_distributions = scipy.stats.distributions.__dict__ - for k, v in scipy_distributions.items(): - if isinstance(v, (scipy.stats.rv_continuous, - scipy.stats.rv_discrete)): - syms['scipy_stats_' + k] = v - - if load_numpy: - from_numpy_random = [ - 'beta', 'binomial', 'bytes', 'chisquare', 'choice', - 'dirichlet', 'division', 'exponential', 'f', 'gamma', - 'geometric', 'gumbel', 'hypergeometric', 'laplace', - 'logistic', 'lognormal', 'logseries', 'mtrand', - 'multinomial', 'multivariate_normal', 'negative_binomial', - 'noncentral_chisquare', 'noncentral_f', 'normal', 'pareto', - 'permutation', 'poisson', 'power', 'rand', 'randint', - 'randn', 'random', 'random_integers', 'random_sample', - 'ranf', 'rayleigh', 'sample', 'seed', 'set_state', - 'shuffle', 'standard_cauchy', 'standard_exponential', - 'standard_gamma', 'standard_normal', 'standard_t', - 'triangular', 'uniform', 'vonmises', 'wald', 'weibull', 'zipf'] - for f in from_numpy_random: - syms['np_random_' + f] = getattr(np.random, f) - - if load_estimators: - estimator_table = { - 'sklearn_svm': getattr(sklearn, 'svm'), - 'sklearn_tree': getattr(sklearn, 'tree'), - 'sklearn_ensemble': getattr(sklearn, 'ensemble'), - 'sklearn_neighbors': getattr(sklearn, 'neighbors'), - 'sklearn_naive_bayes': getattr(sklearn, 'naive_bayes'), - 'sklearn_linear_model': getattr(sklearn, 'linear_model'), - 'sklearn_cluster': getattr(sklearn, 'cluster'), - 'sklearn_decomposition': getattr(sklearn, 'decomposition'), - 'sklearn_preprocessing': getattr(sklearn, 'preprocessing'), - 'sklearn_feature_selection': - getattr(sklearn, 'feature_selection'), - 'sklearn_kernel_approximation': - getattr(sklearn, 'kernel_approximation'), - 'skrebate_ReliefF': getattr(skrebate, 'ReliefF'), - 'skrebate_SURF': getattr(skrebate, 'SURF'), - 'skrebate_SURFstar': getattr(skrebate, 'SURFstar'), - 'skrebate_MultiSURF': getattr(skrebate, 'MultiSURF'), - 'skrebate_MultiSURFstar': getattr(skrebate, 'MultiSURFstar'), - 'skrebate_TuRF': getattr(skrebate, 'TuRF'), - 'xgboost_XGBClassifier': getattr(xgboost, 'XGBClassifier'), - 'xgboost_XGBRegressor': getattr(xgboost, 'XGBRegressor'), - 'imblearn_over_sampling': getattr(imblearn, 'over_sampling'), - 'imblearn_combine': getattr(imblearn, 'combine') - } - syms.update(estimator_table) - - for key in unwanted: - syms.pop(key, None) - - super(SafeEval, self).__init__( - symtable=syms, use_numpy=False, minimal=False, - no_if=True, no_for=True, no_while=True, no_try=True, - no_functiondef=True, no_ifexp=True, no_listcomp=False, - no_augassign=False, no_assert=True, no_delete=True, - no_raise=True, no_print=True) - - -def get_estimator(estimator_json): - """Return a sklearn or compatible estimator from Galaxy tool inputs - """ - estimator_module = estimator_json['selected_module'] - - if estimator_module == 'custom_estimator': - c_estimator = estimator_json['c_estimator'] - with open(c_estimator, 'rb') as model_handler: - new_model = load_model(model_handler) - return new_model - - if estimator_module == "binarize_target": - wrapped_estimator = estimator_json['wrapped_estimator'] - with open(wrapped_estimator, 'rb') as model_handler: - wrapped_estimator = load_model(model_handler) - options = {} - if estimator_json['z_score'] is not None: - options['z_score'] = estimator_json['z_score'] - if estimator_json['value'] is not None: - options['value'] = estimator_json['value'] - options['less_is_positive'] = estimator_json['less_is_positive'] - if estimator_json['clf_or_regr'] == 'BinarizeTargetClassifier': - klass = try_get_attr('iraps_classifier', - 'BinarizeTargetClassifier') - else: - klass = try_get_attr('iraps_classifier', - 'BinarizeTargetRegressor') - return klass(wrapped_estimator, **options) - - estimator_cls = estimator_json['selected_estimator'] - - if estimator_module == 'xgboost': - klass = getattr(xgboost, estimator_cls) - else: - module = getattr(sklearn, estimator_module) - klass = getattr(module, estimator_cls) - - estimator = klass() - - estimator_params = estimator_json['text_params'].strip() - if estimator_params != '': - try: - safe_eval = SafeEval() - params = safe_eval('dict(' + estimator_params + ')') - except ValueError: - sys.exit("Unsupported parameter input: `%s`" % estimator_params) - estimator.set_params(**params) - if 'n_jobs' in estimator.get_params(): - estimator.set_params(n_jobs=N_JOBS) - - return estimator - - -def get_cv(cv_json): - """ Return CV splitter from Galaxy tool inputs - - Parameters - ---------- - cv_json : dict - From Galaxy tool inputs. - e.g.: - { - 'selected_cv': 'StratifiedKFold', - 'n_splits': 3, - 'shuffle': True, - 'random_state': 0 - } - """ - cv = cv_json.pop('selected_cv') - if cv == 'default': - return cv_json['n_splits'], None - - groups = cv_json.pop('groups_selector', None) - if groups is not None: - infile_g = groups['infile_g'] - header = 'infer' if groups['header_g'] else None - column_option = (groups['column_selector_options_g'] - ['selected_column_selector_option_g']) - if column_option in ['by_index_number', 'all_but_by_index_number', - 'by_header_name', 'all_but_by_header_name']: - c = groups['column_selector_options_g']['col_g'] - else: - c = None - groups = read_columns( - infile_g, - c=c, - c_option=column_option, - sep='\t', - header=header, - parse_dates=True) - groups = groups.ravel() - - for k, v in cv_json.items(): - if v == '': - cv_json[k] = None - - test_fold = cv_json.get('test_fold', None) - if test_fold: - if test_fold.startswith('__ob__'): - test_fold = test_fold[6:] - if test_fold.endswith('__cb__'): - test_fold = test_fold[:-6] - cv_json['test_fold'] = [int(x.strip()) for x in test_fold.split(',')] - - test_size = cv_json.get('test_size', None) - if test_size and test_size > 1.0: - cv_json['test_size'] = int(test_size) - - if cv == 'OrderedKFold': - cv_class = try_get_attr('model_validations', 'OrderedKFold') - elif cv == 'RepeatedOrderedKFold': - cv_class = try_get_attr('model_validations', 'RepeatedOrderedKFold') - else: - cv_class = getattr(model_selection, cv) - splitter = cv_class(**cv_json) - - return splitter, groups - - -# needed when sklearn < v0.20 -def balanced_accuracy_score(y_true, y_pred): - """Compute balanced accuracy score, which is now available in - scikit-learn from v0.20.0. - """ - C = metrics.confusion_matrix(y_true, y_pred) - with np.errstate(divide='ignore', invalid='ignore'): - per_class = np.diag(C) / C.sum(axis=1) - if np.any(np.isnan(per_class)): - warnings.warn('y_pred contains classes not in y_true') - per_class = per_class[~np.isnan(per_class)] - score = np.mean(per_class) - return score - - -def get_scoring(scoring_json): - """Return single sklearn scorer class - or multiple scoers in dictionary - """ - if scoring_json['primary_scoring'] == 'default': - return None - - my_scorers = metrics.SCORERS - my_scorers['binarize_auc_scorer'] =\ - try_get_attr('iraps_classifier', 'binarize_auc_scorer') - my_scorers['binarize_average_precision_scorer'] =\ - try_get_attr('iraps_classifier', 'binarize_average_precision_scorer') - if 'balanced_accuracy' not in my_scorers: - my_scorers['balanced_accuracy'] =\ - metrics.make_scorer(balanced_accuracy_score) - - if scoring_json['secondary_scoring'] != 'None'\ - and scoring_json['secondary_scoring'] !=\ - scoring_json['primary_scoring']: - return_scoring = {} - primary_scoring = scoring_json['primary_scoring'] - return_scoring[primary_scoring] = my_scorers[primary_scoring] - for scorer in scoring_json['secondary_scoring'].split(','): - if scorer != scoring_json['primary_scoring']: - return_scoring[scorer] = my_scorers[scorer] - return return_scoring - - return my_scorers[scoring_json['primary_scoring']] - - -def get_search_params(estimator): - """Format the output of `estimator.get_params()` - """ - params = estimator.get_params() - results = [] - for k, v in params.items(): - # params below won't be shown for search in the searchcv tool - keywords = ('n_jobs', 'pre_dispatch', 'memory', 'steps', - 'nthread', 'verbose') - if k.endswith(keywords): - results.append(['*', k, k+": "+repr(v)]) - else: - results.append(['@', k, k+": "+repr(v)]) - results.append( - ["", "Note:", - "@, params eligible for search in searchcv tool."]) - - return results - - -def try_get_attr(module, name): - """try to get attribute from a custom module - - Parameters - ---------- - module : str - Module name - name : str - Attribute (class/function) name. - - Returns - ------- - class or function - """ - mod = sys.modules.get(module, None) - if mod: - return getattr(mod, name) - else: - raise Exception("No module named %s." % module)