Galaxy |

Changeset 24:abb5a3f256e3 (2019-05-14)

Previous changeset 23:9d234733ccfd (2018-12-30) Next changeset 25:27903ce9b4be (2019-07-09)

Commit message:
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit c0a3a186966888e5787335a7628bf0a4382637e7

modified:
main_macros.xml
numeric_clustering.xml
search_model_validation.py
test-data/mv_result02.tabular
test-data/mv_result03.tabular
test-data/mv_result05.tabular
test-data/nn_model01
test-data/pipeline01
test-data/pipeline02
test-data/pipeline03
test-data/pipeline04
test-data/pipeline05
test-data/pipeline06
test-data/pipeline07
test-data/pipeline08
test-data/pipeline09
test-data/pipeline10
test-data/pipeline11
test-data/pipeline12
test-data/searchCV01
test-data/searchCV02
utils.py

added:
feature_selectors.py
iraps_classifier.py
model_validations.py
pk_whitelist.json
preprocessors.py
stacking_ensembles.py
test-data/GridSearchCV.zip
test-data/LinearRegression01.zip
test-data/LinearRegression02.zip
test-data/RFE.zip
test-data/RandomForestClassifier.zip
test-data/RandomForestRegressor01.zip
test-data/StackingCVRegressor01.zip
test-data/StackingCVRegressor02.zip
test-data/XGBRegressor01.zip
test-data/best_estimator_.zip
test-data/best_params_.txt
test-data/best_score_.tabular
test-data/feature_importances_.tabular
test-data/feature_selection_result13
test-data/final_estimator.zip
test-data/get_params.tabular
test-data/get_params01.tabular
test-data/get_params02.tabular
test-data/get_params03.tabular
test-data/get_params04.tabular
test-data/get_params05.tabular
test-data/get_params06.tabular
test-data/get_params07.tabular
test-data/get_params08.tabular
test-data/get_params09.tabular
test-data/get_params10.tabular
test-data/get_params11.tabular
test-data/get_params12.tabular
test-data/named_steps.txt
test-data/pipeline13
test-data/pipeline14
test-data/pipeline15
test-data/ranking_.tabular

removed:
sk_whitelist.json
test-data/mv_result01.tabular
test-data/mv_result04.tabular
test-data/mv_result06.tabular

diff -r 9d234733ccfd -r abb5a3f256e3 feature_selectors.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/feature_selectors.py Tue May 14 18:11:02 2019 -0400

[

b'@@ -0,0 +1,357 @@\n+"""\n+DyRFE\n+DyRFECV\n+MyPipeline\n+MyimbPipeline\n+check_feature_importances\n+"""\n+import numpy as np\n+\n+from imblearn import under_sampling, over_sampling, combine\n+from imblearn.pipeline import Pipeline as imbPipeline\n+from sklearn import (cluster, compose, decomposition, ensemble,\n+ feature_extraction, feature_selection,\n+ gaussian_process, kernel_approximation,\n+ metrics, model_selection, naive_bayes,\n+ neighbors, pipeline, preprocessing,\n+ svm, linear_model, tree, discriminant_analysis)\n+\n+from sklearn.base import BaseEstimator\n+from sklearn.base import MetaEstimatorMixin, clone, is_classifier\n+from sklearn.feature_selection.rfe import _rfe_single_fit, RFE, RFECV\n+from sklearn.model_selection import check_cv\n+from sklearn.metrics.scorer import check_scoring\n+from sklearn.utils import check_X_y, safe_indexing, safe_sqr\n+from sklearn.utils._joblib import Parallel, delayed, effective_n_jobs\n+\n+\n+class DyRFE(RFE):\n+ """\n+ Mainly used with DyRFECV\n+\n+ Parameters\n+ ----------\n+ estimator : object\n+ A supervised learning estimator with a ``fit`` method that provides\n+ information about feature importance either through a ``coef_``\n+ attribute or through a ``feature_importances_`` attribute.\n+ n_features_to_select : int or None (default=None)\n+ The number of features to select. If `None`, half of the features\n+ are selected.\n+ step : int, float or list, optional (default=1)\n+ If greater than or equal to 1, then ``step`` corresponds to the\n+ (integer) number of features to remove at each iteration.\n+ If within (0.0, 1.0), then ``step`` corresponds to the percentage\n+ (rounded down) of features to remove at each iteration.\n+ If list, a series of steps of features to remove at each iteration.\n+ Iterations stops when steps finish\n+ verbose : int, (default=0)\n+ Controls verbosity of output.\n+\n+ """\n+ def __init__(self, estimator, n_features_to_select=None, step=1,\n+ verbose=0):\n+ super(DyRFE, self).__init__(estimator, n_features_to_select,\n+ step, verbose)\n+\n+ def _fit(self, X, y, step_score=None):\n+\n+ if type(self.step) is not list:\n+ return super(DyRFE, self)._fit(X, y, step_score)\n+\n+ # dynamic step\n+ X, y = check_X_y(X, y, "csc")\n+ # Initialization\n+ n_features = X.shape[1]\n+ if self.n_features_to_select is None:\n+ n_features_to_select = n_features // 2\n+ else:\n+ n_features_to_select = self.n_features_to_select\n+\n+ step = []\n+ for s in self.step:\n+ if 0.0 < s < 1.0:\n+ step.append(int(max(1, s * n_features)))\n+ else:\n+ step.append(int(s))\n+ if s <= 0:\n+ raise ValueError("Step must be >0")\n+\n+ support_ = np.ones(n_features, dtype=np.bool)\n+ ranking_ = np.ones(n_features, dtype=np.int)\n+\n+ if step_score:\n+ self.scores_ = []\n+\n+ step_i = 0\n+ # Elimination\n+ while np.sum(support_) > n_features_to_select and step_i < len(step):\n+\n+ # if last step is 1, will keep loop\n+ if step_i == len(step) - 1 and step[step_i] != 0:\n+ step.append(step[step_i])\n+\n+ # Remaining features\n+ features = np.arange(n_features)[support_]\n+\n+ # Rank the remaining features\n+ estimator = clone(self.estimator)\n+ if self.verbose > 0:\n+ print("Fitting estimator with %d features." % np.sum(support_))\n+\n+ estimator.fit(X[:, features], y)\n+\n+ # Get coefs\n+ if hasattr(estimator, \'coef_\'):\n+ coefs = estimator.coef_\n+ else:\n+ coefs = getattr(estimator, \'feature_importances_\', None)\n+ '..b' # Note that joblib raises a non-picklable error for bound methods\n+ # even if n_jobs is set to 1 with the default multiprocessing\n+ # backend.\n+ # This branching is done so that to\n+ # make sure that user code that sets n_jobs to 1\n+ # and provides bound methods as scorers is not broken with the\n+ # addition of n_jobs parameter in version 0.18.\n+\n+ if effective_n_jobs(self.n_jobs) == 1:\n+ parallel, func = list, _rfe_single_fit\n+ else:\n+ parallel = Parallel(n_jobs=self.n_jobs)\n+ func = delayed(_rfe_single_fit)\n+\n+ scores = parallel(\n+ func(rfe, self.estimator, X, y, train, test, scorer)\n+ for train, test in cv.split(X, y, groups))\n+\n+ scores = np.sum(scores, axis=0)\n+ diff = int(scores.shape[0]) - len(step)\n+ if diff > 0:\n+ step = np.r_[step, [step[-1]] * diff]\n+ scores_rev = scores[::-1]\n+ argmax_idx = len(scores) - np.argmax(scores_rev) - 1\n+ n_features_to_select = max(\n+ n_features - sum(step[:argmax_idx]),\n+ self.min_features_to_select)\n+\n+ # Re-execute an elimination with best_k over the whole set\n+ rfe = DyRFE(estimator=self.estimator,\n+ n_features_to_select=n_features_to_select, step=self.step,\n+ verbose=self.verbose)\n+\n+ rfe.fit(X, y)\n+\n+ # Set final attributes\n+ self.support_ = rfe.support_\n+ self.n_features_ = rfe.n_features_\n+ self.ranking_ = rfe.ranking_\n+ self.estimator_ = clone(self.estimator)\n+ self.estimator_.fit(self.transform(X), y)\n+\n+ # Fixing a normalization error, n is equal to get_n_splits(X, y) - 1\n+ # here, the scores are normalized by get_n_splits(X, y)\n+ self.grid_scores_ = scores[::-1] / cv.get_n_splits(X, y, groups)\n+ return self\n+\n+\n+class MyPipeline(pipeline.Pipeline):\n+ """\n+ Extend pipeline object to have feature_importances_ attribute\n+ """\n+ def fit(self, X, y=None, **fit_params):\n+ super(MyPipeline, self).fit(X, y, **fit_params)\n+ estimator = self.steps[-1][-1]\n+ if hasattr(estimator, \'coef_\'):\n+ coefs = estimator.coef_\n+ else:\n+ coefs = getattr(estimator, \'feature_importances_\', None)\n+ if coefs is None:\n+ raise RuntimeError(\'The estimator in the pipeline does not expose \'\n+ \'"coef_" or "feature_importances_" \'\n+ \'attributes\')\n+ self.feature_importances_ = coefs\n+ return self\n+\n+\n+class MyimbPipeline(imbPipeline):\n+ """\n+ Extend imblance pipeline object to have feature_importances_ attribute\n+ """\n+ def fit(self, X, y=None, **fit_params):\n+ super(MyimbPipeline, self).fit(X, y, **fit_params)\n+ estimator = self.steps[-1][-1]\n+ if hasattr(estimator, \'coef_\'):\n+ coefs = estimator.coef_\n+ else:\n+ coefs = getattr(estimator, \'feature_importances_\', None)\n+ if coefs is None:\n+ raise RuntimeError(\'The estimator in the pipeline does not expose \'\n+ \'"coef_" or "feature_importances_" \'\n+ \'attributes\')\n+ self.feature_importances_ = coefs\n+ return self\n+\n+\n+def check_feature_importances(estimator):\n+ """\n+ For pipeline object which has no feature_importances_ property,\n+ this function returns the same comfigured pipeline object with\n+ attached the last estimator\'s feature_importances_.\n+ """\n+ if estimator.__class__.__module__ == \'sklearn.pipeline\':\n+ pipeline_steps = estimator.get_params()[\'steps\']\n+ estimator = MyPipeline(pipeline_steps)\n+ elif estimator.__class__.__module__ == \'imblearn.pipeline\':\n+ pipeline_steps = estimator.get_params()[\'steps\']\n+ estimator = MyimbPipeline(pipeline_steps)\n+ else:\n+ return estimator\n'

diff -r 9d234733ccfd -r abb5a3f256e3 iraps_classifier.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/iraps_classifier.py Tue May 14 18:11:02 2019 -0400

[

b'@@ -0,0 +1,569 @@\n+"""\n+class IRAPSCore\n+class IRAPSClassifier\n+class BinarizeTargetClassifier\n+class BinarizeTargetRegressor\n+class _BinarizeTargetScorer\n+class _BinarizeTargetProbaScorer\n+\n+binarize_auc_scorer\n+binarize_average_precision_scorer\n+\n+binarize_accuracy_scorer\n+binarize_balanced_accuracy_scorer\n+binarize_precision_scorer\n+binarize_recall_scorer\n+"""\n+\n+\n+import numpy as np\n+import random\n+import warnings\n+\n+from abc import ABCMeta\n+from scipy.stats import ttest_ind\n+from sklearn import metrics\n+from sklearn.base import BaseEstimator, clone, RegressorMixin\n+from sklearn.externals import six\n+from sklearn.feature_selection.univariate_selection import _BaseFilter\n+from sklearn.metrics.scorer import _BaseScorer\n+from sklearn.pipeline import Pipeline\n+from sklearn.utils import as_float_array, check_X_y\n+from sklearn.utils._joblib import Parallel, delayed\n+from sklearn.utils.validation import (check_array, check_is_fitted,\n+ check_memory, column_or_1d)\n+\n+\n+VERSION = \'0.1.1\'\n+\n+\n+class IRAPSCore(six.with_metaclass(ABCMeta, BaseEstimator)):\n+ """\n+ Base class of IRAPSClassifier\n+ From sklearn BaseEstimator:\n+ get_params()\n+ set_params()\n+\n+ Parameters\n+ ----------\n+ n_iter : int\n+ sample count\n+\n+ positive_thres : float\n+ z_score shreshold to discretize positive target values\n+\n+ negative_thres : float\n+ z_score threshold to discretize negative target values\n+\n+ verbose : int\n+ 0 or geater, if not 0, print progress\n+\n+ n_jobs : int, default=1\n+ The number of CPUs to use to do the computation.\n+\n+ pre_dispatch : int, or string.\n+ Controls the number of jobs that get dispatched during parallel\n+ execution. Reducing this number can be useful to avoid an\n+ explosion of memory consumption when more jobs get dispatched\n+ than CPUs can process. This parameter can be:\n+ - None, in which case all the jobs are immediately\n+ created and spawned. Use this for lightweight and\n+ fast-running jobs, to avoid delays due to on-demand\n+ spawning of the jobs\n+ - An int, giving the exact number of total jobs that are\n+ spawned\n+ - A string, giving an expression as a function of n_jobs,\n+ as in \'2*n_jobs\'\n+\n+ random_state : int or None\n+ """\n+\n+ def __init__(self, n_iter=1000, positive_thres=-1, negative_thres=0,\n+ verbose=0, n_jobs=1, pre_dispatch=\'2*n_jobs\',\n+ random_state=None):\n+ """\n+ IRAPS turns towwards general Anomaly Detection\n+ It comapares positive_thres with negative_thres,\n+ and decide which portion is the positive target.\n+ e.g.:\n+ (positive_thres=-1, negative_thres=0)\n+ => positive = Z_score of target < -1\n+ (positive_thres=1, negative_thres=0)\n+ => positive = Z_score of target > 1\n+\n+ Note: The positive targets here is always the\n+ abnormal minority group.\n+ """\n+ self.n_iter = n_iter\n+ self.positive_thres = positive_thres\n+ self.negative_thres = negative_thres\n+ self.verbose = verbose\n+ self.n_jobs = n_jobs\n+ self.pre_dispatch = pre_dispatch\n+ self.random_state = random_state\n+\n+ def fit(self, X, y):\n+ """\n+ X: array-like (n_samples x n_features)\n+ y: 1-d array-like (n_samples)\n+ """\n+ X, y = check_X_y(X, y, [\'csr\', \'csc\'], multi_output=False)\n+\n+ def _stochastic_sampling(X, y, random_state=None, positive_thres=-1,\n+ negative_thres=0):\n+ # each iteration select a random number of random subset of\n+ # training samples. this is somewhat different from the original\n+ # IRAPS method, but effect is almost the same.\n+ SAMPLE_SIZE = [0.25, 0.75]\n+ n_samples = X.shape[0'..b'lue = main_estimator.discretize_value\n+ less_is_positive = main_estimator.less_is_positive\n+\n+ if less_is_positive:\n+ y_trans = y < discretize_value\n+ else:\n+ y_trans = y > discretize_value\n+\n+ y_pred = clf.predict(X)\n+ if sample_weight is not None:\n+ return self._sign * self._score_func(y_trans, y_pred,\n+ sample_weight=sample_weight,\n+ **self._kwargs)\n+ else:\n+ return self._sign * self._score_func(y_trans, y_pred,\n+ **self._kwargs)\n+\n+\n+# roc_auc\n+binarize_auc_scorer =\\\n+ _BinarizeTargetProbaScorer(metrics.roc_auc_score, 1, {})\n+\n+# average_precision_scorer\n+binarize_average_precision_scorer =\\\n+ _BinarizeTargetProbaScorer(metrics.average_precision_score, 1, {})\n+\n+# roc_auc_scorer\n+iraps_auc_scorer = binarize_auc_scorer\n+\n+# average_precision_scorer\n+iraps_average_precision_scorer = binarize_average_precision_scorer\n+\n+\n+class BinarizeTargetRegressor(BaseEstimator, RegressorMixin):\n+ """\n+ Extend regression estimator to have discretize_value\n+\n+ Parameters\n+ ----------\n+ regressor: object\n+ Estimator object such as derived from sklearn `RegressionMixin`.\n+\n+ z_score: float, default=-1.0\n+ Threshold value based on z_score. Will be ignored when\n+ fixed_value is set\n+\n+ value: float, default=None\n+ Threshold value\n+\n+ less_is_positive: boolean, default=True\n+ When target is less the threshold value, it will be converted\n+ to True, False otherwise.\n+\n+ Attributes\n+ ----------\n+ regressor_: object\n+ Fitted regressor\n+\n+ discretize_value: float\n+ The threshold value used to discretize True and False targets\n+ """\n+\n+ def __init__(self, regressor, z_score=-1, value=None,\n+ less_is_positive=True):\n+ self.regressor = regressor\n+ self.z_score = z_score\n+ self.value = value\n+ self.less_is_positive = less_is_positive\n+\n+ def fit(self, X, y, sample_weight=None):\n+ """\n+ Calculate the discretize_value fit the regressor with traning data\n+\n+ Returns\n+ ------\n+ self: object\n+ """\n+ y = check_array(y, accept_sparse=False, force_all_finite=True,\n+ ensure_2d=False, dtype=\'numeric\')\n+ y = column_or_1d(y)\n+\n+ if self.value is None:\n+ discretize_value = y.mean() + y.std() * self.z_score\n+ else:\n+ discretize_value = self.Value\n+ self.discretize_value = discretize_value\n+\n+ self.regressor_ = clone(self.regressor)\n+\n+ if sample_weight is not None:\n+ self.regressor_.fit(X, y, sample_weight=sample_weight)\n+ else:\n+ self.regressor_.fit(X, y)\n+\n+ # attach classifier attributes\n+ if hasattr(self.regressor_, \'feature_importances_\'):\n+ self.feature_importances_ = self.regressor_.feature_importances_\n+ if hasattr(self.regressor_, \'coef_\'):\n+ self.coef_ = self.regressor_.coef_\n+ if hasattr(self.regressor_, \'n_outputs_\'):\n+ self.n_outputs_ = self.regressor_.n_outputs_\n+ if hasattr(self.regressor_, \'n_features_\'):\n+ self.n_features_ = self.regressor_.n_features_\n+\n+ return self\n+\n+ def predict(self, X):\n+ """Predict target value of X\n+ """\n+ check_is_fitted(self, \'regressor_\')\n+ y_pred = self.regressor_.predict(X)\n+ if not np.all((y_pred >= 0) & (y_pred <= 1)):\n+ y_pred = (y_pred - y_pred.min()) / (y_pred.max() - y_pred.min())\n+ if self.less_is_positive:\n+ y_pred = 1 - y_pred\n+ return y_pred\n+\n+\n+# roc_auc_scorer\n+regression_auc_scorer = binarize_auc_scorer\n+\n+# average_precision_scorer\n+regression_average_precision_scorer = binarize_average_precision_scorer\n'

diff -r 9d234733ccfd -r abb5a3f256e3 main_macros.xml
--- a/main_macros.xml Sun Dec 30 01:55:30 2018 -0500
+++ b/main_macros.xml Tue May 14 18:11:02 2019 -0400

[

b'@@ -1,14 +1,17 @@\n <macros>\n- <token name="@VERSION@">1.0</token>\n+ <token name="@VERSION@">1.0.0.4</token>\n \n <xml name="python_requirements">\n <requirements>\n <requirement type="package" version="3.6">python</requirement>\n- <requirement type="package" version="0.20.2">scikit-learn</requirement>\n- <requirement type="package" version="0.23.4">pandas</requirement>\n+ <requirement type="package" version="0.20.3">scikit-learn</requirement>\n+ <requirement type="package" version="0.24.2">pandas</requirement>\n <requirement type="package" version="0.80">xgboost</requirement>\n <requirement type="package" version="0.9.13">asteval</requirement>\n- <yield />\n+ <requirement type="package" version="0.6">skrebate</requirement>\n+ <requirement type="package" version="0.4.2">imbalanced-learn</requirement>\n+ <requirement type="package" version="0.16.0">mlxtend</requirement>\n+ <yield/>\n </requirements>\n </xml>\n \n@@ -352,10 +355,10 @@\n <option value="all_columns">All columns</option>\n </param>\n <when value="by_index_number">\n- <param name="@COL_NAME@" multiple="@MULTIPLE@" type="data_column" data_ref="@INFILE@" label="Select target column(s):"/>\n+ <param name="@COL_NAME@" multiple="@MULTIPLE@" type="data_column" use_header_names="true" data_ref="@INFILE@" label="Select target column(s):"/>\n </when>\n <when value="all_but_by_index_number">\n- <param name="@COL_NAME@" multiple="@MULTIPLE@" type="data_column" data_ref="@INFILE@" label="Select target column(s):"/>\n+ <param name="@COL_NAME@" multiple="@MULTIPLE@" type="data_column" use_header_names="true" data_ref="@INFILE@" label="Select target column(s):"/>\n </when>\n <when value="by_header_name">\n <param name="@COL_NAME@" type="text" value="" label="Type header name(s):" help="Comma-separated string. For example: target1,target2"/>\n@@ -428,7 +431,7 @@\n <option value="sparse">sparse matrix</option>\n </param>\n <when value="tabular">\n- <expand macro="samples_tabular" multiple1="true"/>\n+ <expand macro="samples_tabular" multiple1="true" multiple2="false"/>\n </when>\n <when value="sparse">\n <expand macro="sparse_target"/>\n@@ -823,6 +826,8 @@\n <option value="StratifiedShuffleSplit">StratifiedShuffleSplit</option>\n <option value="TimeSeriesSplit">TimeSeriesSplit</option>\n <option value="PredefinedSplit">PredefinedSplit</option>\n+ <option value="OrderedKFold">OrderedKFold</option>\n+ <option value="RepeatedOrderedKFold">RepeatedOrderedKFold</option>\n <yield/>\n </xml>\n \n@@ -872,6 +877,16 @@\n <when value="PredefinedSplit">\n <param argument="test_fold" type="text" value="" area="true" label="test_fold" help="List, e.g., [0, 1, -1, 1], represents two test sets, [X[0]] and [X[1], X[3]], X[2] is excluded from any test set due to \'-1\'."/>\n </when>\n+ <when value="OrderedKFold">\n+ <expand macro="cv_n_splits"/>\n+ <expand macro="cv_shuffle"/>\n+ <expand macro="random_state"/>\n+ </when>\n+ <when value="RepeatedOrderedKFold">\n+ <expand macro="cv_n_splits"/>\n+ <param argument="n_repeats" type="integer" value="5"/>\n+ <expand macro="random_state"/>\n+ </when>\n <yield/>\n </xml>\n \n@@ -929,7 +944,13 @@\n </xml>\n \n <xml name="cv_groups" >\n- <param argument="groups" type="text" value="" area="true" label="Groups" help="Group lables in a list. e.g., [1, 1, 2, 2, 3, 3, 3]"/>\n+ <section name="groups_selector" title="Groups column selector" expanded="true">\n+ <param name="infile_g" type="data" format="tabular" label="Choose dataset containing groups info:"/>\n+ <param name="header_g" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolfalse" checked="False" label="Does the dataset contain header:" />\n+ <conditional name="column_selector_options_g">\n+ <expand macro="sa'..b' </sanitizer>\n- </param>\n- </xml>\n-\n <xml name="search_cv_options">\n <expand macro="scoring_selection"/>\n <expand macro="model_validation_common_options"/>\n- <expand macro="pre_dispatch" value="2*n_jobs" help="Controls the number of jobs that get dispatched during parallel execution"/>\n+ \n <param argument="iid" type="boolean" truevalue="booltrue" falsevalue="boolfalse" checked="true" label="iid" help="If True, data is identically distributed across the folds"/>\n <param argument="refit" type="boolean" truevalue="booltrue" falsevalue="boolfalse" checked="true" label="refit" help="Refit an estimator using the best found parameters on the whole dataset."/>\n <param argument="error_score" type="boolean" truevalue="booltrue" falsevalue="boolfalse" checked="true" label="Raise fit error:" help="If false, the metric score is assigned to NaN if an error occurs in estimator fitting and FitFailedWarning is raised."/>\n@@ -1403,12 +1454,12 @@\n <conditional name="estimator_selector">\n <param name="selected_module" type="select" label="Choose the module that contains target estimator:" >\n <expand macro="estimator_module_options">\n- <option value="customer_estimator">Load a customer estimator</option>\n+ <option value="custom_estimator">Load a custom estimator</option>\n </expand>\n </param>\n <expand macro="estimator_suboptions">\n- <when value="customer_estimator">\n- <param name="c_estimator" type="data" format="zip" label="Choose the dataset containing the customer estimator or pipeline:"/>\n+ <when value="custom_estimator">\n+ <param name="c_estimator" type="data" format="zip" label="Choose the dataset containing the custom estimator or pipeline:"/>\n </when>\n </expand>\n </conditional>\n@@ -1591,6 +1642,7 @@\n <option value="over_sampling.SMOTENC">over_sampling.SMOTENC</option>\n <option value="combine.SMOTEENN">combine.SMOTEENN</option>\n <option value="combine.SMOTETomek">combine.SMOTETomek</option>\n+ <option value="Z_RandomOverSampler">Z_RandomOverSampler - for regression</option>\n </param>\n <when value="under_sampling.ClusterCentroids">\n <expand macro="estimator_params_text"\n@@ -1668,6 +1720,33 @@\n <expand macro="estimator_params_text"\n help="Default(=blank): sampling_strategy=\'auto\', random_state=None, smote=None, tomek=None."/>\n </when>\n+ <when value="Z_RandomOverSampler">\n+ <expand macro="estimator_params_text"\n+ help="Default(=blank): sampling_strategy=\'auto\', random_state=None, negative_thres=0, positive_thres=-1."/>\n+ </when>\n+ </conditional>\n+ </xml>\n+\n+ <xml name="stacking_ensemble_inputs">\n+ <section name="options" title="Advanced Options" expanded="false">\n+ <yield/>\n+ <param argument="use_features_in_secondary" type="boolean" truevalue="booltrue" falsevalue="boolfalse" checked="false"/>\n+ <param argument="store_train_meta_features" type="boolean" truevalue="booltrue" falsevalue="boolfalse" checked="false"/>\n+ </section>\n+ </xml>\n+\n+ <xml name="stacking_base_estimator">\n+ <conditional name="estimator_selector">\n+ <param name="selected_module" type="select" label="Choose the module that contains target estimator:" >\n+ <expand macro="estimator_module_options">\n+ <option value="custom_estimator">Load a custom estimator</option>\n+ </expand>\n+ </param>\n+ <expand macro="estimator_suboptions">\n+ <when value="custom_estimator">\n+ <param name="c_estimator" type="data" format="zip" label="Choose the dataset containing the custom estimator or pipeline"/>\n+ </when>\n+ </expand>\n </conditional>\n </xml>\n \n'

diff -r 9d234733ccfd -r abb5a3f256e3 model_validations.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/model_validations.py Tue May 14 18:11:02 2019 -0400

[

b'@@ -0,0 +1,252 @@\n+"""\n+class\n+-----\n+OrderedKFold\n+RepeatedOrderedKold\n+\n+\n+function\n+--------\n+train_test_split\n+"""\n+\n+import numpy as np\n+import warnings\n+\n+from itertools import chain\n+from math import ceil, floor\n+from sklearn.model_selection import (GroupShuffleSplit, ShuffleSplit,\n+ StratifiedShuffleSplit)\n+from sklearn.model_selection._split import _BaseKFold, _RepeatedSplits\n+from sklearn.utils import check_random_state, indexable, safe_indexing\n+from sklearn.utils.validation import _num_samples, check_array\n+\n+\n+def _validate_shuffle_split(n_samples, test_size, train_size,\n+ default_test_size=None):\n+ """\n+ Validation helper to check if the test/test sizes are meaningful wrt to the\n+ size of the data (n_samples)\n+ """\n+ if test_size is None and train_size is None:\n+ test_size = default_test_size\n+\n+ test_size_type = np.asarray(test_size).dtype.kind\n+ train_size_type = np.asarray(train_size).dtype.kind\n+\n+ if (test_size_type == \'i\' and (test_size >= n_samples or test_size <= 0)\n+ or test_size_type == \'f\' and (test_size <= 0 or test_size >= 1)):\n+ raise ValueError(\'test_size={0} should be either positive and smaller\'\n+ \' than the number of samples {1} or a float in the \'\n+ \'(0, 1) range\'.format(test_size, n_samples))\n+\n+ if (train_size_type == \'i\' and (train_size >= n_samples or train_size <= 0)\n+ or train_size_type == \'f\' and (train_size <= 0 or train_size >= 1)):\n+ raise ValueError(\'train_size={0} should be either positive and smaller\'\n+ \' than the number of samples {1} or a float in the \'\n+ \'(0, 1) range\'.format(train_size, n_samples))\n+\n+ if train_size is not None and train_size_type not in (\'i\', \'f\'):\n+ raise ValueError("Invalid value for train_size: {}".format(train_size))\n+ if test_size is not None and test_size_type not in (\'i\', \'f\'):\n+ raise ValueError("Invalid value for test_size: {}".format(test_size))\n+\n+ if (train_size_type == \'f\' and test_size_type == \'f\' and\n+ train_size + test_size > 1):\n+ raise ValueError(\n+ \'The sum of test_size and train_size = {}, should be in the (0, 1)\'\n+ \' range. Reduce test_size and/or train_size.\'\n+ .format(train_size + test_size))\n+\n+ if test_size_type == \'f\':\n+ n_test = ceil(test_size * n_samples)\n+ elif test_size_type == \'i\':\n+ n_test = float(test_size)\n+\n+ if train_size_type == \'f\':\n+ n_train = floor(train_size * n_samples)\n+ elif train_size_type == \'i\':\n+ n_train = float(train_size)\n+\n+ if train_size is None:\n+ n_train = n_samples - n_test\n+ elif test_size is None:\n+ n_test = n_samples - n_train\n+\n+ if n_train + n_test > n_samples:\n+ raise ValueError(\'The sum of train_size and test_size = %d, \'\n+ \'should be smaller than the number of \'\n+ \'samples %d. Reduce test_size and/or \'\n+ \'train_size.\' % (n_train + n_test, n_samples))\n+\n+ n_train, n_test = int(n_train), int(n_test)\n+\n+ if n_train == 0:\n+ raise ValueError(\n+ \'With n_samples={}, test_size={} and train_size={}, the \'\n+ \'resulting train set will be empty. Adjust any of the \'\n+ \'aforementioned parameters.\'.format(n_samples, test_size,\n+ train_size)\n+ )\n+\n+ return n_train, n_test\n+\n+\n+def train_test_split(*arrays, **options):\n+ """Extend sklearn.model_selection.train_test_slit to have group split.\n+\n+ Parameters\n+ ----------\n+ *arrays : sequence of indexables with same length / shape[0]\n+ Allowed inputs are lists, numpy arrays, scipy-sparse\n+ matrices or pandas dataframes.\n+\n+ test_size : float, int or None, optional (default=None)\n+ If float, should be betw'..b'arrays == 0:\n+ raise ValueError("At least one array required as input")\n+ test_size = options.pop(\'test_size\', None)\n+ train_size = options.pop(\'train_size\', None)\n+ random_state = options.pop(\'random_state\', None)\n+ shuffle = options.pop(\'shuffle\', \'simple\')\n+ labels = options.pop(\'labels\', None)\n+\n+ if options:\n+ raise TypeError("Invalid parameters passed: %s" % str(options))\n+\n+ arrays = indexable(*arrays)\n+\n+ n_samples = _num_samples(arrays[0])\n+ if shuffle == \'group\':\n+ if labels is None:\n+ raise ValueError("When shuffle=\'group\', "\n+ "labels should not be None!")\n+ labels = check_array(labels, ensure_2d=False, dtype=None)\n+ uniques = np.unique(labels)\n+ n_samples = uniques.size\n+\n+ n_train, n_test = _validate_shuffle_split(n_samples, test_size, train_size,\n+ default_test_size=0.25)\n+\n+ shuffle_options = dict(test_size=n_test,\n+ train_size=n_train,\n+ random_state=random_state)\n+\n+ if shuffle is None:\n+ if labels is not None:\n+ warnings.warn("The `labels` is ignored for "\n+ "shuffle being None!")\n+\n+ train = np.arange(n_train)\n+ test = np.arange(n_train, n_train + n_test)\n+\n+ elif shuffle == \'simple\':\n+ if labels is not None:\n+ warnings.warn("The `labels` is not needed and therefore "\n+ "ignored for ShuffleSplit, as shuffle=\'simple\'!")\n+\n+ cv = ShuffleSplit(**shuffle_options)\n+ train, test = next(cv.split(X=arrays[0], y=None))\n+\n+ elif shuffle == \'stratified\':\n+ cv = StratifiedShuffleSplit(**shuffle_options)\n+ train, test = next(cv.split(X=arrays[0], y=labels))\n+\n+ elif shuffle == \'group\':\n+ cv = GroupShuffleSplit(**shuffle_options)\n+ train, test = next(cv.split(X=arrays[0], y=None, groups=labels))\n+\n+ else:\n+ raise ValueError("The argument `shuffle` only supports None, "\n+ "\'simple\', \'stratified\' and \'group\', but got `%s`!"\n+ % shuffle)\n+\n+ return list(chain.from_iterable((safe_indexing(a, train),\n+ safe_indexing(a, test)) for a in arrays))\n+\n+\n+class OrderedKFold(_BaseKFold):\n+ """\n+ Split into K fold based on ordered target value\n+\n+ Parameters\n+ ----------\n+ n_splits : int, default=3\n+ Number of folds. Must be at least 2.\n+ shuffle: bool\n+ random_state: None or int\n+ """\n+\n+ def __init__(self, n_splits=3, shuffle=False, random_state=None):\n+ super(OrderedKFold, self).__init__(n_splits, shuffle, random_state)\n+\n+ def _iter_test_indices(self, X, y, groups=None):\n+ n_samples = _num_samples(X)\n+ n_splits = self.n_splits\n+ y = np.asarray(y)\n+ sorted_index = np.argsort(y)\n+ if self.shuffle:\n+ current = 0\n+ rng = check_random_state(self.random_state)\n+ for i in range(n_samples // int(n_splits)):\n+ start, stop = current, current + n_splits\n+ rng.shuffle(sorted_index[start:stop])\n+ current = stop\n+ rng.shuffle(sorted_index[current:])\n+\n+ for i in range(n_splits):\n+ yield sorted_index[i:n_samples:n_splits]\n+\n+\n+class RepeatedOrderedKFold(_RepeatedSplits):\n+ """ Repeated OrderedKFold runs mutiple times with different randomization.\n+\n+ Parameters\n+ ----------\n+ n_splits : int, default=5\n+ Number of folds. Must be at least 2.\n+\n+ n_repeats : int, default=5\n+ Number of times cross-validator to be repeated.\n+\n+ random_state: int, RandomState instance or None. Optional\n+ """\n+ def __init__(self, n_splits=5, n_repeats=5, random_state=None):\n+ super(RepeatedOrderedKFold, self).__init__(\n+ OrderedKFold, n_repeats, random_state, n_splits=n_splits)\n'

diff -r 9d234733ccfd -r abb5a3f256e3 numeric_clustering.xml
--- a/numeric_clustering.xml Sun Dec 30 01:55:30 2018 -0500
+++ b/numeric_clustering.xml Tue May 14 18:11:02 2019 -0400

[

@@ -16,12 +16,16 @@
<![CDATA[
import sys
import json
+import numpy as np
import sklearn.cluster
import pandas
from sklearn import metrics
from scipy.io import mmread

-exec(open("$__tool_directory__/utils.py").read(), globals())
+sys.path.insert(0, '$__tool_directory__')
+from utils import read_columns
+
+N_JOBS = int(__import__('os').environ.get('GALAXY_SLOTS', 1))

input_json_path = sys.argv[1]
with open(input_json_path, "r") as param_handler:
@@ -56,8 +60,7 @@
     header=header,
     parse_dates=True,
     encoding=None,
-    tupleize_cols=False
-)
+    tupleize_cols=False)
#end if

prediction = cluster_object.fit_predict( data_matrix )

diff -r 9d234733ccfd -r abb5a3f256e3 pk_whitelist.json
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/pk_whitelist.json Tue May 14 18:11:02 2019 -0400

[

b'@@ -0,0 +1,768 @@\n+{ "SK_NAMES": [\n+ "sklearn._ASSUME_FINITE", "sklearn._isotonic._inplace_contiguous_isotonic_regression",\n+ "sklearn._isotonic._make_unique", "sklearn.base.BaseEstimator",\n+ "sklearn.base.BiclusterMixin", "sklearn.base.ClassifierMixin",\n+ "sklearn.base.ClusterMixin", "sklearn.base.DensityMixin",\n+ "sklearn.base.MetaEstimatorMixin", "sklearn.base.RegressorMixin",\n+ "sklearn.base.TransformerMixin", "sklearn.base._first_and_last_element",\n+ "sklearn.base._pprint", "sklearn.base.clone",\n+ "sklearn.base.is_classifier", "sklearn.base.is_regressor",\n+ "sklearn.clone", "sklearn.cluster.AffinityPropagation",\n+ "sklearn.cluster.AgglomerativeClustering", "sklearn.cluster.Birch",\n+ "sklearn.cluster.DBSCAN", "sklearn.cluster.FeatureAgglomeration",\n+ "sklearn.cluster.KMeans", "sklearn.cluster.MeanShift",\n+ "sklearn.cluster.MiniBatchKMeans", "sklearn.cluster.SpectralBiclustering",\n+ "sklearn.cluster.SpectralClustering", "sklearn.cluster.SpectralCoclustering",\n+ "sklearn.cluster._dbscan_inner.dbscan_inner", "sklearn.cluster._feature_agglomeration.AgglomerationTransform",\n+ "sklearn.cluster._hierarchical.WeightedEdge", "sklearn.cluster._hierarchical._get_parents",\n+ "sklearn.cluster._hierarchical._hc_get_descendent", "sklearn.cluster._hierarchical.average_merge",\n+ "sklearn.cluster._hierarchical.compute_ward_dist", "sklearn.cluster._hierarchical.hc_get_heads",\n+ "sklearn.cluster._hierarchical.max_merge", "sklearn.cluster._k_means._assign_labels_array",\n+ "sklearn.cluster._k_means._assign_labels_csr", "sklearn.cluster._k_means._centers_dense",\n+ "sklearn.cluster._k_means._centers_sparse", "sklearn.cluster._k_means._mini_batch_update_csr",\n+ "sklearn.cluster._k_means_elkan.k_means_elkan", "sklearn.cluster.affinity_propagation",\n+ "sklearn.cluster.affinity_propagation_.AffinityPropagation", "sklearn.cluster.affinity_propagation_.affinity_propagation",\n+ "sklearn.cluster.bicluster.BaseSpectral", "sklearn.cluster.bicluster.SpectralBiclustering",\n+ "sklearn.cluster.bicluster.SpectralCoclustering", "sklearn.cluster.bicluster._bistochastic_normalize",\n+ "sklearn.cluster.bicluster._log_normalize", "sklearn.cluster.bicluster._scale_normalize",\n+ "sklearn.cluster.birch.Birch", "sklearn.cluster.birch._CFNode",\n+ "sklearn.cluster.birch._CFSubcluster", "sklearn.cluster.birch._iterate_sparse_X",\n+ "sklearn.cluster.birch._split_node", "sklearn.cluster.dbscan",\n+ "sklearn.cluster.dbscan_.DBSCAN", "sklearn.cluster.dbscan_.dbscan",\n+ "sklearn.cluster.estimate_bandwidth", "sklearn.cluster.get_bin_seeds",\n+ "sklearn.cluster.hierarchical.AgglomerativeClustering", "sklearn.cluster.hierarchical.FeatureAgglomeration",\n+ "sklearn.cluster.hierarchical._TREE_BUILDERS", "sklearn.cluster.hierarchical._average_linkage",\n+ "sklearn.cluster.hierarchical._complete_linkage", "sklearn.cluster.hierarchical._fix_connectivity",\n+ "sklearn.cluster.hierarchical._hc_cut", "sklearn.cluster.hierarchical.linkage_tree",\n+ "sklearn.cluster.hierarchical.ward_tree", "sklearn.cluster.k_means",\n+ "sklearn.cluster.k_means_.FLOAT_DTYPES", "sklearn.cluster.k_means_.KMeans",\n+ "sklearn.cluster.k_means_.MiniBatchKMeans", "sklearn.cluster.k_means_._init_centroids",\n+ "sklearn.cluster.k_means_._k_init", "sklearn.cluster.k_means_._kmeans_single_elkan",\n+ "sklearn.cluster.k_means_._kmeans_single_lloyd", "sklearn.cluster.k_means_._labels_inertia",\n+ "sklearn.cluster.k_means_._labels_inertia_precompute_dense", "sklearn.cluster.k_means_._mini_batch_convergence",\n+ "sklearn.cluster.k_means_._mini_batch_step", "sklearn.cluster.k_means_._tolerance",\n+ "sklearn.cluster.k_means_._validate_center_shape", "sklearn.cluster.k_means_.k_means",\n+ "sklearn.cluster.k_means_.string_types", "sklearn.cluster.linkage_tree",\n+ "sklearn.cluster.mean_shift", "sklearn.cluster.mean_shift_.MeanShift",\n+ "sklearn.cluster.mean_shift_._mean_shift_single_seed", "sklearn.cluster'..b'ltiSURFstar",\n+ "skrebate.ReliefF", "skrebate.SURF",\n+ "skrebate.SURFstar", "skrebate.TuRF",\n+ "skrebate.multisurf.MultiSURF", "skrebate.multisurfstar.MultiSURFstar",\n+ "skrebate.relieff.ReliefF", "skrebate.scoring_utils.MultiSURF_compute_scores",\n+ "skrebate.scoring_utils.MultiSURFstar_compute_scores", "skrebate.scoring_utils.ReliefF_compute_scores",\n+ "skrebate.scoring_utils.SURF_compute_scores", "skrebate.scoring_utils.SURFstar_compute_scores",\n+ "skrebate.scoring_utils.compute_score", "skrebate.scoring_utils.get_row_missing",\n+ "skrebate.scoring_utils.ramp_function", "skrebate.surf.SURF",\n+ "skrebate.surfstar.SURFstar", "skrebate.turf.TuRF"\n+ ],\n+\n+ "XGB_NAMES": [\n+ "xgboost.Booster", "xgboost.DMatrix",\n+ "xgboost.VERSION_FILE", "xgboost.XGBClassifier",\n+ "xgboost.XGBModel", "xgboost.XGBRegressor",\n+ "xgboost.callback._fmt_metric", "xgboost.callback._get_callback_context",\n+ "xgboost.callback.early_stop", "xgboost.callback.print_evaluation",\n+ "xgboost.callback.record_evaluation", "xgboost.callback.reset_learning_rate",\n+ "xgboost.compat.PANDAS_INSTALLED", "xgboost.compat.PY3",\n+ "xgboost.compat.SKLEARN_INSTALLED", "xgboost.compat.STRING_TYPES",\n+ "xgboost.compat.py_str", "xgboost.core.Booster",\n+ "xgboost.core.CallbackEnv", "xgboost.core.DMatrix",\n+ "xgboost.core.EarlyStopException", "xgboost.core.PANDAS_DTYPE_MAPPER",\n+ "xgboost.core.PANDAS_INSTALLED", "xgboost.core.PY3",\n+ "xgboost.core.STRING_TYPES", "xgboost.core.XGBoostError",\n+ "xgboost.core._check_call", "xgboost.core._load_lib",\n+ "xgboost.core._maybe_pandas_data", "xgboost.core._maybe_pandas_label",\n+ "xgboost.core.c_array", "xgboost.core.c_str",\n+ "xgboost.core.ctypes2buffer", "xgboost.core.ctypes2numpy",\n+ "xgboost.core.from_cstr_to_pystr", "xgboost.core.from_pystr_to_cstr",\n+ "xgboost.cv", "xgboost.f",\n+ "xgboost.libpath.XGBoostLibraryNotFound", "xgboost.libpath.find_lib_path",\n+ "xgboost.plot_importance", "xgboost.plot_tree",\n+ "xgboost.plotting._EDGEPAT", "xgboost.plotting._EDGEPAT2",\n+ "xgboost.plotting._LEAFPAT", "xgboost.plotting._NODEPAT",\n+ "xgboost.plotting._parse_edge", "xgboost.plotting._parse_node",\n+ "xgboost.plotting.plot_importance", "xgboost.plotting.plot_tree",\n+ "xgboost.plotting.to_graphviz", "xgboost.rabit.DTYPE_ENUM__",\n+ "xgboost.rabit.STRING_TYPES", "xgboost.rabit._init_rabit",\n+ "xgboost.rabit.allreduce", "xgboost.rabit.broadcast",\n+ "xgboost.rabit.finalize", "xgboost.rabit.get_processor_name",\n+ "xgboost.rabit.get_rank", "xgboost.rabit.get_world_size",\n+ "xgboost.rabit.init", "xgboost.rabit.tracker_print",\n+ "xgboost.rabit.version_number", "xgboost.sklearn.SKLEARN_INSTALLED",\n+ "xgboost.sklearn.XGBClassifier", "xgboost.sklearn.XGBModel",\n+ "xgboost.sklearn.XGBRegressor", "xgboost.sklearn._objective_decorator",\n+ "xgboost.to_graphviz", "xgboost.train",\n+ "xgboost.training.CVPack", "xgboost.training.SKLEARN_INSTALLED",\n+ "xgboost.training.STRING_TYPES", "xgboost.training._train_internal",\n+ "xgboost.training.aggcv", "xgboost.training.cv",\n+ "xgboost.training.mknfold", "xgboost.training.train"\n+ ],\n+\n+\n+ "NUMPY_NAMES": [\n+ "numpy.core.multiarray._reconstruct", "numpy.ndarray",\n+ "numpy.dtype", "numpy.core.multiarray.scalar", "numpy.random.__RandomState_ctor",\n+ "numpy.ma.core._mareconstruct", "numpy.ma.core.MaskedArray"\n+ ],\n+\n+ "IMBLEARN_NAMES":[\n+ "imblearn.pipeline.Pipeline", "imblearn.over_sampling._random_over_sampler.RandomOverSampler",\n+ "imblearn.under_sampling._prototype_selection._edited_nearest_neighbours.EditedNearestNeighbours"\n+ ],\n+\n+ "MLXTEND_NAMES":[\n+ "mlxtend.classifier.stacking_cv_classification.StackingCVClassifier",\n+ "mlxtend.classifier.stacking_classification.StackingClassifier",\n+ "mlxtend.regressor.stacking_cv_regression.StackingCVRegressor",\n+ "mlxtend.regressor.stacking_regression.StackingRegressor"\n+ ]\n+}\n\\ No newline at end of file\n'

diff -r 9d234733ccfd -r abb5a3f256e3 preprocessors.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/preprocessors.py Tue May 14 18:11:02 2019 -0400

[

@@ -0,0 +1,184 @@
+"""
+Z_RandomOverSampler
+"""
+
+import imblearn
+import numpy as np
+
+from collections import Counter
+from imblearn.over_sampling.base import BaseOverSampler
+from imblearn.over_sampling import RandomOverSampler
+from imblearn.pipeline import Pipeline as imbPipeline
+from imblearn.utils import check_target_type
+from scipy import sparse
+from sklearn.base import BaseEstimator, TransformerMixin
+from sklearn.preprocessing.data import _handle_zeros_in_scale
+from sklearn.utils import check_array, safe_indexing
+from sklearn.utils.fixes import nanpercentile
+from sklearn.utils.validation import (check_is_fitted, check_X_y,
+                                      FLOAT_DTYPES)
+
+
+class Z_RandomOverSampler(BaseOverSampler):
+
+    def __init__(self, sampling_strategy='auto',
+                 return_indices=False,
+                 random_state=None,
+                 ratio=None,
+                 negative_thres=0,
+                 positive_thres=-1):
+        super(Z_RandomOverSampler, self).__init__(
+            sampling_strategy=sampling_strategy, ratio=ratio)
+        self.random_state = random_state
+        self.return_indices = return_indices
+        self.negative_thres = negative_thres
+        self.positive_thres = positive_thres
+
+    @staticmethod
+    def _check_X_y(X, y):
+        y, binarize_y = check_target_type(y, indicate_one_vs_all=True)
+        X, y = check_X_y(X, y, accept_sparse=['csr', 'csc'], dtype=None)
+        return X, y, binarize_y
+
+    def _fit_resample(self, X, y):
+        n_samples = X.shape[0]
+
+        # convert y to z_score
+        y_z = (y - y.mean()) / y.std()
+
+        index0 = np.arange(n_samples)
+        index_negative = index0[y_z > self.negative_thres]
+        index_positive = index0[y_z <= self.positive_thres]
+        index_unclassified = [x for x in index0
+                              if x not in index_negative
+                              and x not in index_positive]
+
+        y_z[index_negative] = 0
+        y_z[index_positive] = 1
+        y_z[index_unclassified] = -1
+
+        ros = RandomOverSampler(
+            sampling_strategy=self.sampling_strategy,
+            random_state=self.random_state,
+            ratio=self.ratio)
+        _, _ = ros.fit_resample(X, y_z)
+        sample_indices = ros.sample_indices_
+
+        print("Before sampler: %s. Total after: %s"
+              % (Counter(y_z), sample_indices.shape))
+
+        self.sample_indices_ = np.array(sample_indices)
+
+        if self.return_indices:
+            return (safe_indexing(X, sample_indices),
+                    safe_indexing(y, sample_indices),
+                    sample_indices)
+        return (safe_indexing(X, sample_indices),
+                safe_indexing(y, sample_indices))
+
+
+def _get_quantiles(X, quantile_range):
+    """
+    Calculate column percentiles for 2d array
+
+    Parameters
+    ----------
+    X : array-like, shape [n_samples, n_features]
+    """
+    quantiles = []
+    for feature_idx in range(X.shape[1]):
+        if sparse.issparse(X):
+            column_nnz_data = X.data[
+                X.indptr[feature_idx]: X.indptr[feature_idx + 1]]
+            column_data = np.zeros(shape=X.shape[0], dtype=X.dtype)
+            column_data[:len(column_nnz_data)] = column_nnz_data
+        else:
+            column_data = X[:, feature_idx]
+        quantiles.append(nanpercentile(column_data, quantile_range))
+
+    quantiles = np.transpose(quantiles)
+
+    return quantiles
+
+
+class TDMScaler(BaseEstimator, TransformerMixin):
+    """
+    Scale features using Training Distribution Matching (TDM) algorithm
+
+    References
+    ----------
+    .. [1] Thompson JA, Tan J and Greene CS (2016) Cross-platform
+           normalization of microarray and RNA-seq data for machine
+           learning applications. PeerJ 4, e1621.
+    """
+
+    def __init__(self, q_lower=25.0, q_upper=75.0, ):
+        self.q_lower = q_lower
+        self.q_upper = q_upper
+
+    def fit(self, X, y=None):
+        """
+        Parameters
+        ----------
+        X : array-like, shape [n_samples, n_features]
+        """
+        X = check_array(X, copy=True, estimator=self, dtype=FLOAT_DTYPES,
+                        force_all_finite=True)
+
+        if not 0 <= self.q_lower <= self.q_upper <= 100:
+            raise ValueError("Invalid quantile parameter values: "
+                             "q_lower %s, q_upper: %s"
+                             % (str(self.q_lower), str(self.q_upper)))
+
+        # TODO sparse data
+        quantiles = nanpercentile(X, (self.q_lower, self.q_upper))
+        iqr = quantiles[1] - quantiles[0]
+
+        self.q_lower_ = quantiles[0]
+        self.q_upper_ = quantiles[1]
+        self.iqr_ = _handle_zeros_in_scale(iqr, copy=False)
+
+        self.max_ = np.nanmax(X)
+        self.min_ = np.nanmin(X)
+
+        return self
+
+    def transform(self, X):
+        """
+        Parameters
+        ----------
+        X : {array-like, sparse matrix}
+            The data used to scale along the specified axis.
+        """
+        check_is_fitted(self, 'iqr_', 'max_')
+        X = check_array(X, copy=True, estimator=self, dtype=FLOAT_DTYPES,
+                        force_all_finite=True)
+
+        # TODO sparse data
+        train_upper_scale = (self.max_ - self.q_upper_) / self.iqr_
+        train_lower_scale = (self.q_lower_ - self.min_) / self.iqr_
+
+        test_quantiles = nanpercentile(X, (self.q_lower, self.q_upper))
+        test_iqr = _handle_zeros_in_scale(
+            test_quantiles[1] - test_quantiles[0], copy=False)
+
+        test_upper_bound = test_quantiles[1] + train_upper_scale * test_iqr
+        test_lower_bound = test_quantiles[0] - train_lower_scale * test_iqr
+
+        test_min = np.nanmin(X)
+        if test_lower_bound < test_min:
+            test_lower_bound = test_min
+
+        X[X > test_upper_bound] = test_upper_bound
+        X[X < test_lower_bound] = test_lower_bound
+
+        X = (X - test_lower_bound) / (test_upper_bound - test_lower_bound)\
+            * (self.max_ - self.min_) + self.min_
+
+        return X
+
+    def inverse_transform(self, X):
+        """
+        Scale the data back to the original state
+        """
+        raise NotImplementedError("Inverse transformation is not implemented!")

diff -r 9d234733ccfd -r abb5a3f256e3 search_model_validation.py
--- a/search_model_validation.py Sun Dec 30 01:55:30 2018 -0500
+++ b/search_model_validation.py Tue May 14 18:11:02 2019 -0400

[

b'@@ -1,7 +1,8 @@\n+import argparse\n+import collections\n import imblearn\n import json\n import numpy as np\n-import os\n import pandas\n import pickle\n import skrebate\n@@ -9,93 +10,124 @@\n import sys\n import xgboost\n import warnings\n+import iraps_classifier\n+import model_validations\n+import preprocessors\n+import feature_selectors\n from imblearn import under_sampling, over_sampling, combine\n-from imblearn.pipeline import Pipeline as imbPipeline\n-from sklearn import (cluster, compose, decomposition, ensemble, feature_extraction,\n- feature_selection, gaussian_process, kernel_approximation, metrics,\n- model_selection, naive_bayes, neighbors, pipeline, preprocessing,\n- svm, linear_model, tree, discriminant_analysis)\n+from scipy.io import mmread\n+from mlxtend import classifier, regressor\n+from sklearn import (cluster, compose, decomposition, ensemble,\n+ feature_extraction, feature_selection,\n+ gaussian_process, kernel_approximation, metrics,\n+ model_selection, naive_bayes, neighbors,\n+ pipeline, preprocessing, svm, linear_model,\n+ tree, discriminant_analysis)\n from sklearn.exceptions import FitFailedWarning\n from sklearn.externals import joblib\n-from utils import get_cv, get_scoring, get_X_y, load_model, read_columns, SafeEval\n+from sklearn.model_selection._validation import _score\n+\n+from utils import (SafeEval, get_cv, get_scoring, get_X_y,\n+ load_model, read_columns)\n+from model_validations import train_test_split\n \n \n-N_JOBS = int(os.environ.get(\'GALAXY_SLOTS\', 1))\n+N_JOBS = int(__import__(\'os\').environ.get(\'GALAXY_SLOTS\', 1))\n+CACHE_DIR = \'./cached\'\n+NON_SEARCHABLE = (\'n_jobs\', \'pre_dispatch\', \'memory\', \'steps\',\n+ \'nthread\', \'verbose\')\n \n \n-def get_search_params(params_builder):\n+def _eval_search_params(params_builder):\n search_params = {}\n- safe_eval = SafeEval(load_scipy=True, load_numpy=True)\n- safe_eval_es = SafeEval(load_estimators=True)\n \n for p in params_builder[\'param_set\']:\n- search_p = p[\'search_param_selector\'][\'search_p\']\n- if search_p.strip() == \'\':\n+ search_list = p[\'sp_list\'].strip()\n+ if search_list == \'\':\n continue\n- param_type = p[\'search_param_selector\'][\'selected_param_type\']\n+\n+ param_name = p[\'sp_name\']\n+ if param_name.lower().endswith(NON_SEARCHABLE):\n+ print("Warning: `%s` is not eligible for search and was "\n+ "omitted!" % param_name)\n+ continue\n \n- lst = search_p.split(\':\')\n- assert (len(lst) == 2), "Error, make sure there is one and only one colon in search parameter input."\n- literal = lst[1].strip()\n- param_name = lst[0].strip()\n- if param_name:\n- if param_name.lower() == \'n_jobs\':\n- sys.exit("Parameter `%s` is invalid for search." %param_name)\n- elif not param_name.endswith(\'-\'):\n- ev = safe_eval(literal)\n- if param_type == \'final_estimator_p\':\n- search_params[\'estimator__\' + param_name] = ev\n- else:\n- search_params[\'preprocessing_\' + param_type[5:6] + \'__\' + param_name] = ev\n- else:\n- # only for estimator eval, add `-` to the end of param\n- #TODO maybe add regular express check\n- ev = safe_eval_es(literal)\n- for obj in ev:\n- if \'n_jobs\' in obj.get_params():\n- obj.set_params( n_jobs=N_JOBS )\n- if param_type == \'final_estimator_p\':\n- search_params[\'estimator__\' + param_name[:-1]] = ev\n- else:\n- search_params[\'preprocessing_\' + param_type[5:6] + \'__\' + param_name[:-1]] = ev\n- elif param_type != \'final_estimator_p\':\n- #TODO regular express check ?\n- '..b'_train_test_split == \'yes\':\n+ # make sure refit is choosen\n+ if not options[\'refit\']:\n+ raise ValueError("Refit must be `True` for shuffle splitting!")\n+ split_options = params[\'train_test_split\']\n+\n+ # splits\n+ if split_options[\'shuffle\'] == \'stratified\':\n+ split_options[\'labels\'] = y\n+ X, X_test, y, y_test = train_test_split(X, y, **split_options)\n+ elif split_options[\'shuffle\'] == \'group\':\n+ if not groups:\n+ raise ValueError("No group based CV option was "\n+ "choosen for group shuffle!")\n+ split_options[\'labels\'] = groups\n+ X, X_test, y, y_test, groups, _ =\\\n+ train_test_split(X, y, **split_options)\n+ else:\n+ if split_options[\'shuffle\'] == \'None\':\n+ split_options[\'shuffle\'] = None\n+ X, X_test, y, y_test =\\\n+ train_test_split(X, y, **split_options)\n+ # end train_test_split\n \n if options[\'error_score\'] == \'raise\':\n- searcher.fit(X, y)\n+ searcher.fit(X, y, groups=groups)\n else:\n warnings.simplefilter(\'always\', FitFailedWarning)\n with warnings.catch_warnings(record=True) as w:\n try:\n- searcher.fit(X, y)\n+ searcher.fit(X, y, groups=groups)\n except ValueError:\n pass\n for warning in w:\n print(repr(warning.message))\n \n- cv_result = pandas.DataFrame(searcher.cv_results_)\n- cv_result.rename(inplace=True, columns={\'mean_test_primary\': \'mean_test_\'+primary_scoring, \'rank_test_primary\': \'rank_test_\'+primary_scoring})\n- cv_result.to_csv(path_or_buf=outfile_result, sep=\'\\t\', header=True, index=False)\n+ if do_train_test_split == \'no\':\n+ # save results\n+ cv_results = pandas.DataFrame(searcher.cv_results_)\n+ cv_results = cv_results[sorted(cv_results.columns)]\n+ cv_results.to_csv(path_or_buf=outfile_result, sep=\'\\t\',\n+ header=True, index=False)\n+\n+ # output test result using best_estimator_\n+ else:\n+ best_estimator_ = searcher.best_estimator_\n+ if isinstance(options[\'scoring\'], collections.Mapping):\n+ is_multimetric = True\n+ else:\n+ is_multimetric = False\n \n- if outfile_estimator:\n- with open(outfile_estimator, \'wb\') as output_handler:\n- pickle.dump(searcher.best_estimator_, output_handler, pickle.HIGHEST_PROTOCOL)\n+ test_score = _score(best_estimator_, X_test,\n+ y_test, options[\'scoring\'],\n+ is_multimetric=is_multimetric)\n+ if not is_multimetric:\n+ test_score = {primary_scoring: test_score}\n+ for key, value in test_score.items():\n+ test_score[key] = [value]\n+ result_df = pandas.DataFrame(test_score)\n+ result_df.to_csv(path_or_buf=outfile_result, sep=\'\\t\',\n+ header=True, index=False)\n+\n+ memory.clear(warn=False)\n+\n+ if outfile_object:\n+ with open(outfile_object, \'wb\') as output_handler:\n+ pickle.dump(searcher, output_handler, pickle.HIGHEST_PROTOCOL)\n+\n+\n+if __name__ == \'__main__\':\n+ aparser = argparse.ArgumentParser()\n+ aparser.add_argument("-i", "--inputs", dest="inputs", required=True)\n+ aparser.add_argument("-e", "--estimator", dest="infile_estimator")\n+ aparser.add_argument("-X", "--infile1", dest="infile1")\n+ aparser.add_argument("-y", "--infile2", dest="infile2")\n+ aparser.add_argument("-r", "--outfile_result", dest="outfile_result")\n+ aparser.add_argument("-o", "--outfile_object", dest="outfile_object")\n+ aparser.add_argument("-g", "--groups", dest="groups")\n+ args = aparser.parse_args()\n+\n+ main(args.inputs, args.infile_estimator, args.infile1, args.infile2,\n+ args.outfile_result, outfile_object=args.outfile_object,\n+ groups=args.groups)\n'

diff -r 9d234733ccfd -r abb5a3f256e3 sk_whitelist.json
--- a/sk_whitelist.json Sun Dec 30 01:55:30 2018 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000

[

b'@@ -1,761 +0,0 @@\n-{ "SK_NAMES": [\n- "sklearn._ASSUME_FINITE", "sklearn._isotonic._inplace_contiguous_isotonic_regression",\n- "sklearn._isotonic._make_unique", "sklearn.base.BaseEstimator",\n- "sklearn.base.BiclusterMixin", "sklearn.base.ClassifierMixin",\n- "sklearn.base.ClusterMixin", "sklearn.base.DensityMixin",\n- "sklearn.base.MetaEstimatorMixin", "sklearn.base.RegressorMixin",\n- "sklearn.base.TransformerMixin", "sklearn.base._first_and_last_element",\n- "sklearn.base._pprint", "sklearn.base.clone",\n- "sklearn.base.is_classifier", "sklearn.base.is_regressor",\n- "sklearn.clone", "sklearn.cluster.AffinityPropagation",\n- "sklearn.cluster.AgglomerativeClustering", "sklearn.cluster.Birch",\n- "sklearn.cluster.DBSCAN", "sklearn.cluster.FeatureAgglomeration",\n- "sklearn.cluster.KMeans", "sklearn.cluster.MeanShift",\n- "sklearn.cluster.MiniBatchKMeans", "sklearn.cluster.SpectralBiclustering",\n- "sklearn.cluster.SpectralClustering", "sklearn.cluster.SpectralCoclustering",\n- "sklearn.cluster._dbscan_inner.dbscan_inner", "sklearn.cluster._feature_agglomeration.AgglomerationTransform",\n- "sklearn.cluster._hierarchical.WeightedEdge", "sklearn.cluster._hierarchical._get_parents",\n- "sklearn.cluster._hierarchical._hc_get_descendent", "sklearn.cluster._hierarchical.average_merge",\n- "sklearn.cluster._hierarchical.compute_ward_dist", "sklearn.cluster._hierarchical.hc_get_heads",\n- "sklearn.cluster._hierarchical.max_merge", "sklearn.cluster._k_means._assign_labels_array",\n- "sklearn.cluster._k_means._assign_labels_csr", "sklearn.cluster._k_means._centers_dense",\n- "sklearn.cluster._k_means._centers_sparse", "sklearn.cluster._k_means._mini_batch_update_csr",\n- "sklearn.cluster._k_means_elkan.k_means_elkan", "sklearn.cluster.affinity_propagation",\n- "sklearn.cluster.affinity_propagation_.AffinityPropagation", "sklearn.cluster.affinity_propagation_.affinity_propagation",\n- "sklearn.cluster.bicluster.BaseSpectral", "sklearn.cluster.bicluster.SpectralBiclustering",\n- "sklearn.cluster.bicluster.SpectralCoclustering", "sklearn.cluster.bicluster._bistochastic_normalize",\n- "sklearn.cluster.bicluster._log_normalize", "sklearn.cluster.bicluster._scale_normalize",\n- "sklearn.cluster.birch.Birch", "sklearn.cluster.birch._CFNode",\n- "sklearn.cluster.birch._CFSubcluster", "sklearn.cluster.birch._iterate_sparse_X",\n- "sklearn.cluster.birch._split_node", "sklearn.cluster.dbscan",\n- "sklearn.cluster.dbscan_.DBSCAN", "sklearn.cluster.dbscan_.dbscan",\n- "sklearn.cluster.estimate_bandwidth", "sklearn.cluster.get_bin_seeds",\n- "sklearn.cluster.hierarchical.AgglomerativeClustering", "sklearn.cluster.hierarchical.FeatureAgglomeration",\n- "sklearn.cluster.hierarchical._TREE_BUILDERS", "sklearn.cluster.hierarchical._average_linkage",\n- "sklearn.cluster.hierarchical._complete_linkage", "sklearn.cluster.hierarchical._fix_connectivity",\n- "sklearn.cluster.hierarchical._hc_cut", "sklearn.cluster.hierarchical.linkage_tree",\n- "sklearn.cluster.hierarchical.ward_tree", "sklearn.cluster.k_means",\n- "sklearn.cluster.k_means_.FLOAT_DTYPES", "sklearn.cluster.k_means_.KMeans",\n- "sklearn.cluster.k_means_.MiniBatchKMeans", "sklearn.cluster.k_means_._init_centroids",\n- "sklearn.cluster.k_means_._k_init", "sklearn.cluster.k_means_._kmeans_single_elkan",\n- "sklearn.cluster.k_means_._kmeans_single_lloyd", "sklearn.cluster.k_means_._labels_inertia",\n- "sklearn.cluster.k_means_._labels_inertia_precompute_dense", "sklearn.cluster.k_means_._mini_batch_convergence",\n- "sklearn.cluster.k_means_._mini_batch_step", "sklearn.cluster.k_means_._tolerance",\n- "sklearn.cluster.k_means_._validate_center_shape", "sklearn.cluster.k_means_.k_means",\n- "sklearn.cluster.k_means_.string_types", "sklearn.cluster.linkage_tree",\n- "sklearn.cluster.mean_shift", "sklearn.cluster.mean_shift_.MeanShift",\n- "sklearn.cluster.mean_shift_._mean_shift_single_seed", "sklearn.cluster'..b'lidation.check_non_negative", "sklearn.utils.validation.check_random_state",\n- "sklearn.utils.validation.check_symmetric", "sklearn.utils.validation.column_or_1d",\n- "sklearn.utils.validation.has_fit_parameter", "sklearn.utils.validation.indexable",\n- "sklearn.utils.weight_vector.WeightVector"\n-],\n-\n- "SKR_NAMES": [\n- "skrebate.MultiSURF", "skrebate.MultiSURFstar",\n- "skrebate.ReliefF", "skrebate.SURF",\n- "skrebate.SURFstar", "skrebate.TuRF",\n- "skrebate.multisurf.MultiSURF", "skrebate.multisurfstar.MultiSURFstar",\n- "skrebate.relieff.ReliefF", "skrebate.scoring_utils.MultiSURF_compute_scores",\n- "skrebate.scoring_utils.MultiSURFstar_compute_scores", "skrebate.scoring_utils.ReliefF_compute_scores",\n- "skrebate.scoring_utils.SURF_compute_scores", "skrebate.scoring_utils.SURFstar_compute_scores",\n- "skrebate.scoring_utils.compute_score", "skrebate.scoring_utils.get_row_missing",\n- "skrebate.scoring_utils.ramp_function", "skrebate.surf.SURF",\n- "skrebate.surfstar.SURFstar", "skrebate.turf.TuRF"\n- ],\n-\n- "XGB_NAMES": [\n- "xgboost.Booster", "xgboost.DMatrix",\n- "xgboost.VERSION_FILE", "xgboost.XGBClassifier",\n- "xgboost.XGBModel", "xgboost.XGBRegressor",\n- "xgboost.callback._fmt_metric", "xgboost.callback._get_callback_context",\n- "xgboost.callback.early_stop", "xgboost.callback.print_evaluation",\n- "xgboost.callback.record_evaluation", "xgboost.callback.reset_learning_rate",\n- "xgboost.compat.PANDAS_INSTALLED", "xgboost.compat.PY3",\n- "xgboost.compat.SKLEARN_INSTALLED", "xgboost.compat.STRING_TYPES",\n- "xgboost.compat.py_str", "xgboost.core.Booster",\n- "xgboost.core.CallbackEnv", "xgboost.core.DMatrix",\n- "xgboost.core.EarlyStopException", "xgboost.core.PANDAS_DTYPE_MAPPER",\n- "xgboost.core.PANDAS_INSTALLED", "xgboost.core.PY3",\n- "xgboost.core.STRING_TYPES", "xgboost.core.XGBoostError",\n- "xgboost.core._check_call", "xgboost.core._load_lib",\n- "xgboost.core._maybe_pandas_data", "xgboost.core._maybe_pandas_label",\n- "xgboost.core.c_array", "xgboost.core.c_str",\n- "xgboost.core.ctypes2buffer", "xgboost.core.ctypes2numpy",\n- "xgboost.core.from_cstr_to_pystr", "xgboost.core.from_pystr_to_cstr",\n- "xgboost.cv", "xgboost.f",\n- "xgboost.libpath.XGBoostLibraryNotFound", "xgboost.libpath.find_lib_path",\n- "xgboost.plot_importance", "xgboost.plot_tree",\n- "xgboost.plotting._EDGEPAT", "xgboost.plotting._EDGEPAT2",\n- "xgboost.plotting._LEAFPAT", "xgboost.plotting._NODEPAT",\n- "xgboost.plotting._parse_edge", "xgboost.plotting._parse_node",\n- "xgboost.plotting.plot_importance", "xgboost.plotting.plot_tree",\n- "xgboost.plotting.to_graphviz", "xgboost.rabit.DTYPE_ENUM__",\n- "xgboost.rabit.STRING_TYPES", "xgboost.rabit._init_rabit",\n- "xgboost.rabit.allreduce", "xgboost.rabit.broadcast",\n- "xgboost.rabit.finalize", "xgboost.rabit.get_processor_name",\n- "xgboost.rabit.get_rank", "xgboost.rabit.get_world_size",\n- "xgboost.rabit.init", "xgboost.rabit.tracker_print",\n- "xgboost.rabit.version_number", "xgboost.sklearn.SKLEARN_INSTALLED",\n- "xgboost.sklearn.XGBClassifier", "xgboost.sklearn.XGBModel",\n- "xgboost.sklearn.XGBRegressor", "xgboost.sklearn._objective_decorator",\n- "xgboost.to_graphviz", "xgboost.train",\n- "xgboost.training.CVPack", "xgboost.training.SKLEARN_INSTALLED",\n- "xgboost.training.STRING_TYPES", "xgboost.training._train_internal",\n- "xgboost.training.aggcv", "xgboost.training.cv",\n- "xgboost.training.mknfold", "xgboost.training.train"\n- ],\n-\n-\n- "NUMPY_NAMES": [\n- "numpy.core.multiarray._reconstruct", "numpy.ndarray",\n- "numpy.dtype", "numpy.core.multiarray.scalar",\n- "numpy.random.__RandomState_ctor"\n- ],\n-\n- "IMBLEARN_NAMES":[\n- "imblearn.pipeline.Pipeline", "imblearn.over_sampling._random_over_sampler.RandomOverSampler",\n- "imblearn.under_sampling._prototype_selection._edited_nearest_neighbours.EditedNearestNeighbours"\n- ]\n-}\n\\ No newline at end of file\n'

diff -r 9d234733ccfd -r abb5a3f256e3 stacking_ensembles.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/stacking_ensembles.py Tue May 14 18:11:02 2019 -0400

[

@@ -0,0 +1,128 @@
+import argparse
+import json
+import pandas as pd
+import pickle
+import xgboost
+import warnings
+from sklearn import (cluster, compose, decomposition, ensemble,
+                     feature_extraction, feature_selection,
+                     gaussian_process, kernel_approximation, metrics,
+                     model_selection, naive_bayes, neighbors,
+                     pipeline, preprocessing, svm, linear_model,
+                     tree, discriminant_analysis)
+from sklearn.model_selection._split import check_cv
+from feature_selectors import (DyRFE, DyRFECV,
+                               MyPipeline, MyimbPipeline)
+from iraps_classifier import (IRAPSCore, IRAPSClassifier,
+                              BinarizeTargetClassifier,
+                              BinarizeTargetRegressor)
+from preprocessors import Z_RandomOverSampler
+from utils import load_model, get_cv, get_estimator, get_search_params
+
+from mlxtend.regressor import StackingCVRegressor, StackingRegressor
+from mlxtend.classifier import StackingCVClassifier, StackingClassifier
+
+
+warnings.filterwarnings('ignore')
+
+N_JOBS = int(__import__('os').environ.get('GALAXY_SLOTS', 1))
+
+
+def main(inputs_path, output_obj, base_paths=None, meta_path=None,
+         outfile_params=None):
+    """
+    Parameter
+    ---------
+    inputs_path : str
+        File path for Galaxy parameters
+
+    output_obj : str
+        File path for ensemble estimator ouput
+
+    base_paths : str
+        File path or paths concatenated by comma.
+
+    meta_path : str
+        File path
+
+    outfile_params : str
+        File path for params output
+    """
+    with open(inputs_path, 'r') as param_handler:
+        params = json.load(param_handler)
+
+    base_estimators = []
+    for idx, base_file in enumerate(base_paths.split(',')):
+        if base_file and base_file != 'None':
+            with open(base_file, 'rb') as handler:
+                model = load_model(handler)
+        else:
+            estimator_json = (params['base_est_builder'][idx]
+                              ['estimator_selector'])
+            model = get_estimator(estimator_json)
+        base_estimators.append(model)
+
+    if meta_path:
+        with open(meta_path, 'rb') as f:
+            meta_estimator = load_model(f)
+    else:
+        estimator_json = params['meta_estimator']['estimator_selector']
+        meta_estimator = get_estimator(estimator_json)
+
+    options = params['algo_selection']['options']
+
+    cv_selector = options.pop('cv_selector', None)
+    if cv_selector:
+        splitter, groups = get_cv(cv_selector)
+        options['cv'] = splitter
+        # set n_jobs
+        options['n_jobs'] = N_JOBS
+
+    if params['algo_selection']['estimator_type'] == 'StackingCVClassifier':
+        ensemble_estimator = StackingCVClassifier(
+            classifiers=base_estimators,
+            meta_classifier=meta_estimator,
+            **options)
+
+    elif params['algo_selection']['estimator_type'] == 'StackingClassifier':
+        ensemble_estimator = StackingClassifier(
+            classifiers=base_estimators,
+            meta_classifier=meta_estimator,
+            **options)
+
+    elif params['algo_selection']['estimator_type'] == 'StackingCVRegressor':
+        ensemble_estimator = StackingCVRegressor(
+            regressors=base_estimators,
+            meta_regressor=meta_estimator,
+            **options)
+
+    else:
+        ensemble_estimator = StackingRegressor(
+            regressors=base_estimators,
+            meta_regressor=meta_estimator,
+            **options)
+
+    print(ensemble_estimator)
+    for base_est in base_estimators:
+        print(base_est)
+
+    with open(output_obj, 'wb') as out_handler:
+        pickle.dump(ensemble_estimator, out_handler, pickle.HIGHEST_PROTOCOL)
+
+    if params['get_params'] and outfile_params:
+        results = get_search_params(ensemble_estimator)
+        df = pd.DataFrame(results, columns=['', 'Parameter', 'Value'])
+        df.to_csv(outfile_params, sep='\t', index=False)
+
+
+if __name__ == '__main__':
+    aparser = argparse.ArgumentParser()
+    aparser.add_argument("-b", "--bases", dest="bases")
+    aparser.add_argument("-m", "--meta", dest="meta")
+    aparser.add_argument("-i", "--inputs", dest="inputs")
+    aparser.add_argument("-o", "--outfile", dest="outfile")
+    aparser.add_argument("-p", "--outfile_params", dest="outfile_params")
+    args = aparser.parse_args()
+
+    main(args.inputs, args.outfile, base_paths=args.bases,
+         meta_path=args.meta, outfile_params=args.outfile_params)

diff -r 9d234733ccfd -r abb5a3f256e3 test-data/GridSearchCV.zip

Binary file test-data/GridSearchCV.zip has changed

diff -r 9d234733ccfd -r abb5a3f256e3 test-data/LinearRegression01.zip

Binary file test-data/LinearRegression01.zip has changed

diff -r 9d234733ccfd -r abb5a3f256e3 test-data/LinearRegression02.zip

Binary file test-data/LinearRegression02.zip has changed

diff -r 9d234733ccfd -r abb5a3f256e3 test-data/RFE.zip

Binary file test-data/RFE.zip has changed

diff -r 9d234733ccfd -r abb5a3f256e3 test-data/RandomForestClassifier.zip

Binary file test-data/RandomForestClassifier.zip has changed

diff -r 9d234733ccfd -r abb5a3f256e3 test-data/RandomForestRegressor01.zip

Binary file test-data/RandomForestRegressor01.zip has changed

diff -r 9d234733ccfd -r abb5a3f256e3 test-data/StackingCVRegressor01.zip

Binary file test-data/StackingCVRegressor01.zip has changed

diff -r 9d234733ccfd -r abb5a3f256e3 test-data/StackingCVRegressor02.zip

Binary file test-data/StackingCVRegressor02.zip has changed

diff -r 9d234733ccfd -r abb5a3f256e3 test-data/XGBRegressor01.zip

Binary file test-data/XGBRegressor01.zip has changed

diff -r 9d234733ccfd -r abb5a3f256e3 test-data/best_estimator_.zip

Binary file test-data/best_estimator_.zip has changed

diff -r 9d234733ccfd -r abb5a3f256e3 test-data/best_params_.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/best_params_.txt Tue May 14 18:11:02 2019 -0400

@@ -0,0 +1,1 @@
+{'estimator__n_estimators': 100}
\ No newline at end of file

diff -r 9d234733ccfd -r abb5a3f256e3 test-data/best_score_.tabular
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/best_score_.tabular Tue May 14 18:11:02 2019 -0400

@@ -0,0 +1,2 @@
+best_score_
+0.7976348550293088

diff -r 9d234733ccfd -r abb5a3f256e3 test-data/feature_importances_.tabular
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/feature_importances_.tabular Tue May 14 18:11:02 2019 -0400

@@ -0,0 +1,11 @@
+feature_importances_
+0.15959252
+0.20373514
+0.22071308
+0.06281833
+0.098471984
+0.06960951
+0.13073005
+0.027164686
+0.022071308
+0.0050933785

diff -r 9d234733ccfd -r abb5a3f256e3 test-data/feature_selection_result13
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/feature_selection_result13 Tue May 14 18:11:02 2019 -0400

@@ -0,0 +1,262 @@
+temp_1 average forecast_noaa friend
+69.0 69.7 65.0 88.0
+59.0 58.1 57.0 66.0
+88.0 77.3 75.0 70.0
+65.0 64.7 63.0 58.0
+50.0 47.5 44.0 58.0
+51.0 48.2 45.0 63.0
+52.0 48.6 45.0 41.0
+78.0 76.7 75.0 66.0
+35.0 45.2 43.0 38.0
+40.0 46.1 45.0 36.0
+47.0 45.3 41.0 58.0
+72.0 76.3 76.0 88.0
+76.0 74.4 73.0 72.0
+39.0 45.3 45.0 46.0
+78.0 72.2 70.0 84.0
+71.0 67.3 63.0 85.0
+48.0 47.7 44.0 61.0
+72.0 77.0 77.0 68.0
+57.0 54.7 50.0 70.0
+40.0 45.1 44.0 39.0
+54.0 47.6 47.0 53.0
+58.0 53.2 52.0 71.0
+68.0 58.6 58.0 54.0
+65.0 55.3 55.0 65.0
+47.0 48.8 46.0 51.0
+44.0 45.6 43.0 42.0
+64.0 67.1 64.0 69.0
+62.0 57.1 57.0 67.0
+66.0 65.7 64.0 74.0
+70.0 71.8 67.0 90.0
+57.0 54.2 54.0 70.0
+50.0 50.5 46.0 57.0
+55.0 51.8 49.0 71.0
+55.0 49.5 46.0 67.0
+42.0 45.2 41.0 47.0
+65.0 60.1 57.0 41.0
+63.0 65.6 63.0 73.0
+48.0 47.3 45.0 28.0
+42.0 46.3 44.0 62.0
+51.0 46.2 45.0 38.0
+64.0 68.0 65.0 64.0
+75.0 74.6 74.0 63.0
+52.0 46.7 42.0 39.0
+67.0 68.6 66.0 80.0
+68.0 68.7 65.0 56.0
+54.0 55.0 53.0 42.0
+62.0 56.8 52.0 70.0
+76.0 76.1 76.0 61.0
+73.0 73.1 71.0 93.0
+52.0 50.3 50.0 35.0
+70.0 73.9 71.0 68.0
+77.0 77.4 75.0 62.0
+60.0 56.6 52.0 72.0
+52.0 53.3 50.0 54.0
+79.0 75.0 71.0 85.0
+76.0 57.2 53.0 74.0
+66.0 66.5 64.0 85.0
+57.0 61.8 58.0 62.0
+66.0 57.4 57.0 60.0
+61.0 58.4 58.0 41.0
+55.0 53.1 52.0 65.0
+48.0 48.1 46.0 54.0
+49.0 49.2 46.0 63.0
+65.0 66.7 64.0 73.0
+60.0 62.5 58.0 56.0
+56.0 53.0 53.0 36.0
+59.0 57.4 56.0 44.0
+44.0 45.7 41.0 35.0
+82.0 63.2 62.0 83.0
+64.0 67.0 65.0 76.0
+43.0 45.5 41.0 46.0
+64.0 55.7 51.0 57.0
+63.0 52.7 49.0 49.0
+70.0 70.6 67.0 79.0
+71.0 52.4 48.0 42.0
+76.0 73.5 69.0 85.0
+68.0 62.1 58.0 55.0
+39.0 45.3 44.0 39.0
+71.0 70.7 70.0 52.0
+69.0 71.7 68.0 89.0
+74.0 71.5 71.0 82.0
+81.0 64.1 62.0 81.0
+51.0 49.3 49.0 34.0
+45.0 46.8 44.0 61.0
+87.0 76.8 73.0 73.0
+71.0 73.8 71.0 86.0
+55.0 60.3 56.0 77.0
+80.0 76.9 72.0 81.0
+67.0 69.0 65.0 76.0
+61.0 61.4 60.0 78.0
+46.0 46.6 43.0 65.0
+39.0 45.1 42.0 51.0
+67.0 68.3 67.0 61.0
+52.0 47.8 43.0 50.0
+67.0 69.8 68.0 87.0
+75.0 71.2 67.0 77.0
+68.0 73.3 73.0 79.0
+92.0 68.2 65.0 71.0
+67.0 72.8 69.0 56.0
+44.0 45.8 43.0 56.0
+61.0 61.0 56.0 73.0
+65.0 53.4 49.0 41.0
+68.0 73.0 72.0 70.0
+87.0 62.1 62.0 69.0
+117.0 54.8 51.0 62.0
+80.0 76.4 75.0 66.0
+57.0 51.0 47.0 46.0
+67.0 63.6 61.0 68.0
+58.0 54.0 51.0 56.0
+65.0 56.2 53.0 41.0
+52.0 48.6 45.0 47.0
+59.0 55.3 52.0 39.0
+57.0 53.9 53.0 35.0
+81.0 59.2 56.0 66.0
+75.0 77.1 76.0 75.0
+76.0 77.4 76.0 95.0
+57.0 64.8 61.0 53.0
+69.0 74.2 72.0 86.0
+77.0 66.8 66.0 64.0
+55.0 49.9 47.0 55.0
+49.0 46.8 45.0 53.0
+54.0 52.7 48.0 57.0
+55.0 51.2 49.0 42.0
+56.0 55.6 53.0 45.0
+68.0 74.6 72.0 77.0
+54.0 53.4 49.0 44.0
+67.0 69.0 69.0 87.0
+49.0 46.9 45.0 33.0
+49.0 49.1 47.0 45.0
+56.0 48.5 48.0 49.0
+73.0 71.0 66.0 78.0
+66.0 66.4 65.0 60.0
+69.0 66.5 66.0 62.0
+82.0 64.5 64.0 65.0
+90.0 76.7 75.0 65.0
+51.0 50.7 49.0 43.0
+77.0 57.1 57.0 41.0
+60.0 61.4 58.0 58.0
+74.0 72.8 71.0 87.0
+85.0 77.2 73.0 74.0
+68.0 62.8 61.0 64.0
+56.0 49.5 46.0 37.0
+71.0 56.2 55.0 45.0
+62.0 59.5 57.0 40.0
+83.0 77.3 76.0 76.0
+64.0 65.4 62.0 56.0
+56.0 48.4 45.0 54.0
+41.0 45.1 42.0 31.0
+65.0 66.2 66.0 67.0
+65.0 53.7 49.0 38.0
+40.0 46.0 46.0 41.0
+45.0 45.6 43.0 29.0
+52.0 48.4 48.0 58.0
+63.0 51.7 50.0 63.0
+52.0 47.6 47.0 44.0
+60.0 57.9 55.0 77.0
+81.0 75.7 73.0 89.0
+75.0 75.8 74.0 77.0
+59.0 51.4 48.0 64.0
+73.0 77.1 77.0 94.0
+75.0 77.3 73.0 66.0
+60.0 58.5 56.0 59.0
+75.0 71.3 68.0 56.0
+59.0 57.6 56.0 40.0
+53.0 49.1 47.0 56.0
+79.0 77.2 76.0 60.0
+57.0 52.1 49.0 46.0
+75.0 67.6 64.0 77.0
+71.0 69.4 67.0 81.0
+53.0 50.2 50.0 42.0
+46.0 48.8 48.0 56.0
+81.0 76.9 72.0 70.0
+49.0 48.9 47.0 29.0
+57.0 48.4 44.0 34.0
+60.0 58.8 54.0 53.0
+67.0 73.7 72.0 64.0
+61.0 64.1 62.0 60.0
+66.0 69.5 66.0 85.0
+64.0 51.9 50.0 55.0
+66.0 65.7 62.0 49.0
+64.0 52.2 52.0 49.0
+71.0 65.2 61.0 56.0
+75.0 63.8 62.0 60.0
+48.0 46.4 46.0 47.0
+53.0 52.5 48.0 70.0
+49.0 47.1 46.0 65.0
+85.0 68.5 67.0 81.0
+62.0 49.4 48.0 30.0
+50.0 47.0 42.0 58.0
+58.0 55.9 51.0 39.0
+72.0 77.2 74.0 95.0
+55.0 50.7 50.0 34.0
+74.0 72.3 70.0 91.0
+85.0 77.3 77.0 77.0
+73.0 77.3 77.0 93.0
+52.0 47.4 44.0 39.0
+67.0 67.6 64.0 62.0
+45.0 45.1 45.0 35.0
+46.0 47.2 46.0 41.0
+66.0 60.6 60.0 57.0
+71.0 77.0 75.0 86.0
+70.0 69.3 66.0 79.0
+58.0 49.9 46.0 53.0
+72.0 77.1 76.0 65.0
+74.0 75.4 74.0 71.0
+65.0 64.5 63.0 49.0
+77.0 58.8 55.0 39.0
+59.0 50.9 49.0 35.0
+45.0 45.7 41.0 61.0
+53.0 50.5 49.0 46.0
+53.0 54.9 54.0 72.0
+79.0 77.3 73.0 79.0
+49.0 49.0 44.0 44.0
+63.0 62.9 62.0 78.0
+69.0 56.5 54.0 45.0
+60.0 50.8 47.0 46.0
+64.0 62.5 60.0 73.0
+79.0 71.0 66.0 64.0
+55.0 47.0 43.0 58.0
+73.0 56.0 54.0 41.0
+60.0 59.1 57.0 62.0
+67.0 70.2 67.0 77.0
+42.0 45.2 45.0 58.0
+60.0 65.0 62.0 55.0
+57.0 49.8 47.0 30.0
+35.0 45.2 44.0 36.0
+75.0 70.3 66.0 84.0
+61.0 51.1 48.0 65.0
+51.0 50.6 46.0 59.0
+71.0 71.9 67.0 70.0
+74.0 75.3 74.0 71.0
+48.0 45.4 44.0 42.0
+74.0 74.9 70.0 60.0
+76.0 70.8 68.0 57.0
+58.0 51.6 47.0 37.0
+51.0 50.4 48.0 43.0
+72.0 72.6 68.0 78.0
+76.0 67.2 64.0 74.0
+52.0 47.9 47.0 60.0
+53.0 48.2 48.0 53.0
+65.0 69.1 65.0 83.0
+58.0 58.1 58.0 43.0
+77.0 75.6 74.0 56.0
+61.0 52.9 51.0 35.0
+67.0 65.3 64.0 54.0
+54.0 49.3 46.0 58.0
+79.0 67.4 65.0 58.0
+77.0 64.3 63.0 67.0
+71.0 67.7 64.0 55.0
+58.0 57.7 54.0 61.0
+68.0 55.9 55.0 56.0
+40.0 45.4 45.0 49.0
+80.0 77.3 75.0 71.0
+74.0 62.3 59.0 61.0
+57.0 45.5 42.0 57.0
+52.0 47.8 43.0 57.0
+71.0 75.1 71.0 95.0
+49.0 53.6 49.0 70.0
+89.0 59.0 59.0 61.0
+60.0 60.2 56.0 78.0
+59.0 58.3 58.0 40.0

diff -r 9d234733ccfd -r abb5a3f256e3 test-data/final_estimator.zip

Binary file test-data/final_estimator.zip has changed

diff -r 9d234733ccfd -r abb5a3f256e3 test-data/get_params.tabular
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/get_params.tabular Tue May 14 18:11:02 2019 -0400

@@ -0,0 +1,6 @@
+ Parameter Value
+@ copy_X copy_X: True
+@ fit_intercept fit_intercept: True
+* n_jobs n_jobs: 1
+@ normalize normalize: False
+ Note: @, params eligible for search in searchcv tool.

diff -r 9d234733ccfd -r abb5a3f256e3 test-data/get_params01.tabular
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/get_params01.tabular Tue May 14 18:11:02 2019 -0400

[

@@ -0,0 +1,30 @@
+ Parameter Value
+* memory memory: None
+* steps "steps: [('robustscaler', RobustScaler(copy=True, quantile_range=(25.0, 75.0), with_centering=True,
+       with_scaling=True)), ('selectkbest', SelectKBest(k=10, score_func=<function f_classif at 0x111ef0158>)), ('svr', SVR(C=1.0, cache_size=200, coef0=0.0, degree=3, epsilon=0.1,
+  gamma='auto_deprecated', kernel='linear', max_iter=-1, shrinking=True,
+  tol=0.001, verbose=False))]"
+@ robustscaler "robustscaler: RobustScaler(copy=True, quantile_range=(25.0, 75.0), with_centering=True,
+       with_scaling=True)"
+@ selectkbest selectkbest: SelectKBest(k=10, score_func=<function f_classif at 0x111ef0158>)
+@ svr "svr: SVR(C=1.0, cache_size=200, coef0=0.0, degree=3, epsilon=0.1,
+  gamma='auto_deprecated', kernel='linear', max_iter=-1, shrinking=True,
+  tol=0.001, verbose=False)"
+@ robustscaler__copy robustscaler__copy: True
+@ robustscaler__quantile_range robustscaler__quantile_range: (25.0, 75.0)
+@ robustscaler__with_centering robustscaler__with_centering: True
+@ robustscaler__with_scaling robustscaler__with_scaling: True
+@ selectkbest__k selectkbest__k: 10
+@ selectkbest__score_func selectkbest__score_func: <function f_classif at 0x111ef0158>
+@ svr__C svr__C: 1.0
+@ svr__cache_size svr__cache_size: 200
+@ svr__coef0 svr__coef0: 0.0
+@ svr__degree svr__degree: 3
+@ svr__epsilon svr__epsilon: 0.1
+@ svr__gamma svr__gamma: 'auto_deprecated'
+@ svr__kernel svr__kernel: 'linear'
+@ svr__max_iter svr__max_iter: -1
+@ svr__shrinking svr__shrinking: True
+@ svr__tol svr__tol: 0.001
+* svr__verbose svr__verbose: False
+ Note: @, searchable params in searchcv too.

diff -r 9d234733ccfd -r abb5a3f256e3 test-data/get_params02.tabular
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/get_params02.tabular Tue May 14 18:11:02 2019 -0400

[

@@ -0,0 +1,33 @@
+ Parameter Value
+* memory memory: None
+* steps "steps: [('robustscaler', RobustScaler(copy=True, quantile_range=(25.0, 75.0), with_centering=True,
+       with_scaling=True)), ('lassocv', LassoCV(alphas=None, copy_X=True, cv='warn', eps=0.001, fit_intercept=True,
+    max_iter=1000, n_alphas=100, n_jobs=1, normalize=False, positive=False,
+    precompute='auto', random_state=None, selection='cyclic', tol=0.0001,
+    verbose=False))]"
+@ robustscaler "robustscaler: RobustScaler(copy=True, quantile_range=(25.0, 75.0), with_centering=True,
+       with_scaling=True)"
+@ lassocv "lassocv: LassoCV(alphas=None, copy_X=True, cv='warn', eps=0.001, fit_intercept=True,
+    max_iter=1000, n_alphas=100, n_jobs=1, normalize=False, positive=False,
+    precompute='auto', random_state=None, selection='cyclic', tol=0.0001,
+    verbose=False)"
+@ robustscaler__copy robustscaler__copy: True
+@ robustscaler__quantile_range robustscaler__quantile_range: (25.0, 75.0)
+@ robustscaler__with_centering robustscaler__with_centering: True
+@ robustscaler__with_scaling robustscaler__with_scaling: True
+@ lassocv__alphas lassocv__alphas: None
+@ lassocv__copy_X lassocv__copy_X: True
+@ lassocv__cv lassocv__cv: 'warn'
+@ lassocv__eps lassocv__eps: 0.001
+@ lassocv__fit_intercept lassocv__fit_intercept: True
+@ lassocv__max_iter lassocv__max_iter: 1000
+@ lassocv__n_alphas lassocv__n_alphas: 100
+* lassocv__n_jobs lassocv__n_jobs: 1
+@ lassocv__normalize lassocv__normalize: False
+@ lassocv__positive lassocv__positive: False
+@ lassocv__precompute lassocv__precompute: 'auto'
+@ lassocv__random_state lassocv__random_state: None
+@ lassocv__selection lassocv__selection: 'cyclic'
+@ lassocv__tol lassocv__tol: 0.0001
+* lassocv__verbose lassocv__verbose: False
+ Note: @, searchable params in searchcv too.

diff -r 9d234733ccfd -r abb5a3f256e3 test-data/get_params03.tabular
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/get_params03.tabular Tue May 14 18:11:02 2019 -0400

[

@@ -0,0 +1,43 @@
+ Parameter Value
+* memory memory: None
+* steps "steps: [('robustscaler', RobustScaler(copy=True, quantile_range=(25.0, 75.0), with_centering=True,
+       with_scaling=True)), ('xgbclassifier', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
+       colsample_bytree=1, gamma=0, learning_rate=0.1, max_delta_step=0,
+       max_depth=3, min_child_weight=1, missing=nan, n_estimators=100,
+       n_jobs=1, nthread=None, objective='binary:logistic', random_state=0,
+       reg_alpha=0, reg_lambda=1, scale_pos_weight=1, seed=None,
+       silent=True, subsample=1))]"
+@ robustscaler "robustscaler: RobustScaler(copy=True, quantile_range=(25.0, 75.0), with_centering=True,
+       with_scaling=True)"
+@ xgbclassifier "xgbclassifier: XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
+       colsample_bytree=1, gamma=0, learning_rate=0.1, max_delta_step=0,
+       max_depth=3, min_child_weight=1, missing=nan, n_estimators=100,
+       n_jobs=1, nthread=None, objective='binary:logistic', random_state=0,
+       reg_alpha=0, reg_lambda=1, scale_pos_weight=1, seed=None,
+       silent=True, subsample=1)"
+@ robustscaler__copy robustscaler__copy: True
+@ robustscaler__quantile_range robustscaler__quantile_range: (25.0, 75.0)
+@ robustscaler__with_centering robustscaler__with_centering: True
+@ robustscaler__with_scaling robustscaler__with_scaling: True
+@ xgbclassifier__base_score xgbclassifier__base_score: 0.5
+@ xgbclassifier__booster xgbclassifier__booster: 'gbtree'
+@ xgbclassifier__colsample_bylevel xgbclassifier__colsample_bylevel: 1
+@ xgbclassifier__colsample_bytree xgbclassifier__colsample_bytree: 1
+@ xgbclassifier__gamma xgbclassifier__gamma: 0
+@ xgbclassifier__learning_rate xgbclassifier__learning_rate: 0.1
+@ xgbclassifier__max_delta_step xgbclassifier__max_delta_step: 0
+@ xgbclassifier__max_depth xgbclassifier__max_depth: 3
+@ xgbclassifier__min_child_weight xgbclassifier__min_child_weight: 1
+@ xgbclassifier__missing xgbclassifier__missing: nan
+@ xgbclassifier__n_estimators xgbclassifier__n_estimators: 100
+* xgbclassifier__n_jobs xgbclassifier__n_jobs: 1
+* xgbclassifier__nthread xgbclassifier__nthread: None
+@ xgbclassifier__objective xgbclassifier__objective: 'binary:logistic'
+@ xgbclassifier__random_state xgbclassifier__random_state: 0
+@ xgbclassifier__reg_alpha xgbclassifier__reg_alpha: 0
+@ xgbclassifier__reg_lambda xgbclassifier__reg_lambda: 1
+@ xgbclassifier__scale_pos_weight xgbclassifier__scale_pos_weight: 1
+@ xgbclassifier__seed xgbclassifier__seed: None
+@ xgbclassifier__silent xgbclassifier__silent: True
+@ xgbclassifier__subsample xgbclassifier__subsample: 1
+ Note: @, searchable params in searchcv too.

diff -r 9d234733ccfd -r abb5a3f256e3 test-data/get_params04.tabular
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/get_params04.tabular Tue May 14 18:11:02 2019 -0400

[

@@ -0,0 +1,39 @@
+ Parameter Value
+* memory memory: None
+* steps "steps: [('selectfrommodel', SelectFromModel(estimator=AdaBoostClassifier(algorithm='SAMME.R', base_estimator=None,
+          learning_rate=1.0, n_estimators=50, random_state=None),
+        max_features=None, norm_order=1, prefit=False, threshold=None)), ('linearsvc', LinearSVC(C=1.0, class_weight=None, dual=True, fit_intercept=True,
+     intercept_scaling=1, loss='squared_hinge', max_iter=1000,
+     multi_class='ovr', penalty='l2', random_state=None, tol=0.0001,
+     verbose=0))]"
+@ selectfrommodel "selectfrommodel: SelectFromModel(estimator=AdaBoostClassifier(algorithm='SAMME.R', base_estimator=None,
+          learning_rate=1.0, n_estimators=50, random_state=None),
+        max_features=None, norm_order=1, prefit=False, threshold=None)"
+@ linearsvc "linearsvc: LinearSVC(C=1.0, class_weight=None, dual=True, fit_intercept=True,
+     intercept_scaling=1, loss='squared_hinge', max_iter=1000,
+     multi_class='ovr', penalty='l2', random_state=None, tol=0.0001,
+     verbose=0)"
+@ selectfrommodel__estimator__algorithm selectfrommodel__estimator__algorithm: 'SAMME.R'
+@ selectfrommodel__estimator__base_estimator selectfrommodel__estimator__base_estimator: None
+@ selectfrommodel__estimator__learning_rate selectfrommodel__estimator__learning_rate: 1.0
+@ selectfrommodel__estimator__n_estimators selectfrommodel__estimator__n_estimators: 50
+@ selectfrommodel__estimator__random_state selectfrommodel__estimator__random_state: None
+@ selectfrommodel__estimator "selectfrommodel__estimator: AdaBoostClassifier(algorithm='SAMME.R', base_estimator=None,
+          learning_rate=1.0, n_estimators=50, random_state=None)"
+@ selectfrommodel__max_features selectfrommodel__max_features: None
+@ selectfrommodel__norm_order selectfrommodel__norm_order: 1
+@ selectfrommodel__prefit selectfrommodel__prefit: False
+@ selectfrommodel__threshold selectfrommodel__threshold: None
+@ linearsvc__C linearsvc__C: 1.0
+@ linearsvc__class_weight linearsvc__class_weight: None
+@ linearsvc__dual linearsvc__dual: True
+@ linearsvc__fit_intercept linearsvc__fit_intercept: True
+@ linearsvc__intercept_scaling linearsvc__intercept_scaling: 1
+@ linearsvc__loss linearsvc__loss: 'squared_hinge'
+@ linearsvc__max_iter linearsvc__max_iter: 1000
+@ linearsvc__multi_class linearsvc__multi_class: 'ovr'
+@ linearsvc__penalty linearsvc__penalty: 'l2'
+@ linearsvc__random_state linearsvc__random_state: None
+@ linearsvc__tol linearsvc__tol: 0.0001
+* linearsvc__verbose linearsvc__verbose: 0
+ Note: @, searchable params in searchcv too.

diff -r 9d234733ccfd -r abb5a3f256e3 test-data/get_params05.tabular
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/get_params05.tabular Tue May 14 18:11:02 2019 -0400

[

@@ -0,0 +1,31 @@
+ Parameter Value
+* memory memory: None
+* steps "steps: [('randomforestregressor', RandomForestRegressor(bootstrap=True, criterion='mse', max_depth=None,
+           max_features='auto', max_leaf_nodes=None,
+           min_impurity_decrease=0.0, min_impurity_split=None,
+           min_samples_leaf=1, min_samples_split=2,
+           min_weight_fraction_leaf=0.0, n_estimators=100, n_jobs=1,
+           oob_score=False, random_state=42, verbose=0, warm_start=False))]"
+@ randomforestregressor "randomforestregressor: RandomForestRegressor(bootstrap=True, criterion='mse', max_depth=None,
+           max_features='auto', max_leaf_nodes=None,
+           min_impurity_decrease=0.0, min_impurity_split=None,
+           min_samples_leaf=1, min_samples_split=2,
+           min_weight_fraction_leaf=0.0, n_estimators=100, n_jobs=1,
+           oob_score=False, random_state=42, verbose=0, warm_start=False)"
+@ randomforestregressor__bootstrap randomforestregressor__bootstrap: True
+@ randomforestregressor__criterion randomforestregressor__criterion: 'mse'
+@ randomforestregressor__max_depth randomforestregressor__max_depth: None
+@ randomforestregressor__max_features randomforestregressor__max_features: 'auto'
+@ randomforestregressor__max_leaf_nodes randomforestregressor__max_leaf_nodes: None
+@ randomforestregressor__min_impurity_decrease randomforestregressor__min_impurity_decrease: 0.0
+@ randomforestregressor__min_impurity_split randomforestregressor__min_impurity_split: None
+@ randomforestregressor__min_samples_leaf randomforestregressor__min_samples_leaf: 1
+@ randomforestregressor__min_samples_split randomforestregressor__min_samples_split: 2
+@ randomforestregressor__min_weight_fraction_leaf randomforestregressor__min_weight_fraction_leaf: 0.0
+@ randomforestregressor__n_estimators randomforestregressor__n_estimators: 100
+* randomforestregressor__n_jobs randomforestregressor__n_jobs: 1
+@ randomforestregressor__oob_score randomforestregressor__oob_score: False
+@ randomforestregressor__random_state randomforestregressor__random_state: 42
+* randomforestregressor__verbose randomforestregressor__verbose: 0
+@ randomforestregressor__warm_start randomforestregressor__warm_start: False
+ Note: @, searchable params in searchcv too.

diff -r 9d234733ccfd -r abb5a3f256e3 test-data/get_params06.tabular
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/get_params06.tabular Tue May 14 18:11:02 2019 -0400

[

@@ -0,0 +1,22 @@
+ Parameter Value
+* memory memory: None
+* steps "steps: [('pca', PCA(copy=True, iterated_power='auto', n_components=None, random_state=None,
+  svd_solver='auto', tol=0.0, whiten=False)), ('adaboostregressor', AdaBoostRegressor(base_estimator=None, learning_rate=1.0, loss='linear',
+         n_estimators=50, random_state=None))]"
+@ pca "pca: PCA(copy=True, iterated_power='auto', n_components=None, random_state=None,
+  svd_solver='auto', tol=0.0, whiten=False)"
+@ adaboostregressor "adaboostregressor: AdaBoostRegressor(base_estimator=None, learning_rate=1.0, loss='linear',
+         n_estimators=50, random_state=None)"
+@ pca__copy pca__copy: True
+@ pca__iterated_power pca__iterated_power: 'auto'
+@ pca__n_components pca__n_components: None
+@ pca__random_state pca__random_state: None
+@ pca__svd_solver pca__svd_solver: 'auto'
+@ pca__tol pca__tol: 0.0
+@ pca__whiten pca__whiten: False
+@ adaboostregressor__base_estimator adaboostregressor__base_estimator: None
+@ adaboostregressor__learning_rate adaboostregressor__learning_rate: 1.0
+@ adaboostregressor__loss adaboostregressor__loss: 'linear'
+@ adaboostregressor__n_estimators adaboostregressor__n_estimators: 50
+@ adaboostregressor__random_state adaboostregressor__random_state: None
+ Note: @, searchable params in searchcv too.

diff -r 9d234733ccfd -r abb5a3f256e3 test-data/get_params07.tabular
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/get_params07.tabular Tue May 14 18:11:02 2019 -0400

[

@@ -0,0 +1,16 @@
+ Parameter Value
+* memory memory: None
+* steps "steps: [('rbfsampler', RBFSampler(gamma=2.0, n_components=10, random_state=None)), ('adaboostclassifier', AdaBoostClassifier(algorithm='SAMME.R', base_estimator=None,
+ learning_rate=1.0, n_estimators=50, random_state=None))]"
+@ rbfsampler rbfsampler: RBFSampler(gamma=2.0, n_components=10, random_state=None)
+@ adaboostclassifier "adaboostclassifier: AdaBoostClassifier(algorithm='SAMME.R', base_estimator=None,
+ learning_rate=1.0, n_estimators=50, random_state=None)"
+@ rbfsampler__gamma rbfsampler__gamma: 2.0
+@ rbfsampler__n_components rbfsampler__n_components: 10
+@ rbfsampler__random_state rbfsampler__random_state: None
+@ adaboostclassifier__algorithm adaboostclassifier__algorithm: 'SAMME.R'
+@ adaboostclassifier__base_estimator adaboostclassifier__base_estimator: None
+@ adaboostclassifier__learning_rate adaboostclassifier__learning_rate: 1.0
+@ adaboostclassifier__n_estimators adaboostclassifier__n_estimators: 50
+@ adaboostclassifier__random_state adaboostclassifier__random_state: None
+ Note: @, searchable params in searchcv too.

diff -r 9d234733ccfd -r abb5a3f256e3 test-data/get_params08.tabular
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/get_params08.tabular Tue May 14 18:11:02 2019 -0400

[

@@ -0,0 +1,24 @@
+ Parameter Value
+* memory memory: None
+* steps "steps: [('featureagglomeration', FeatureAgglomeration(affinity='euclidean', compute_full_tree='auto',
+           connectivity=None, linkage='ward', memory=None, n_clusters=3,
+           pooling_func=<function mean at 0x1123f1620>)), ('adaboostclassifier', AdaBoostClassifier(algorithm='SAMME.R', base_estimator=None,
+          learning_rate=1.0, n_estimators=50, random_state=None))]"
+@ featureagglomeration "featureagglomeration: FeatureAgglomeration(affinity='euclidean', compute_full_tree='auto',
+           connectivity=None, linkage='ward', memory=None, n_clusters=3,
+           pooling_func=<function mean at 0x1123f1620>)"
+@ adaboostclassifier "adaboostclassifier: AdaBoostClassifier(algorithm='SAMME.R', base_estimator=None,
+          learning_rate=1.0, n_estimators=50, random_state=None)"
+@ featureagglomeration__affinity featureagglomeration__affinity: 'euclidean'
+@ featureagglomeration__compute_full_tree featureagglomeration__compute_full_tree: 'auto'
+@ featureagglomeration__connectivity featureagglomeration__connectivity: None
+@ featureagglomeration__linkage featureagglomeration__linkage: 'ward'
+* featureagglomeration__memory featureagglomeration__memory: None
+@ featureagglomeration__n_clusters featureagglomeration__n_clusters: 3
+@ featureagglomeration__pooling_func featureagglomeration__pooling_func: <function mean at 0x1123f1620>
+@ adaboostclassifier__algorithm adaboostclassifier__algorithm: 'SAMME.R'
+@ adaboostclassifier__base_estimator adaboostclassifier__base_estimator: None
+@ adaboostclassifier__learning_rate adaboostclassifier__learning_rate: 1.0
+@ adaboostclassifier__n_estimators adaboostclassifier__n_estimators: 50
+@ adaboostclassifier__random_state adaboostclassifier__random_state: None
+ Note: @, searchable params in searchcv too.

diff -r 9d234733ccfd -r abb5a3f256e3 test-data/get_params09.tabular
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/get_params09.tabular Tue May 14 18:11:02 2019 -0400

[

@@ -0,0 +1,39 @@
+ Parameter Value
+* memory memory: None
+* steps "steps: [('relieff', ReliefF(discrete_threshold=10, n_features_to_select=3, n_jobs=1,
+    n_neighbors=100, verbose=False)), ('randomforestregressor', RandomForestRegressor(bootstrap=True, criterion='mse', max_depth=None,
+           max_features='auto', max_leaf_nodes=None,
+           min_impurity_decrease=0.0, min_impurity_split=None,
+           min_samples_leaf=1, min_samples_split=2,
+           min_weight_fraction_leaf=0.0, n_estimators='warn', n_jobs=1,
+           oob_score=False, random_state=None, verbose=0, warm_start=False))]"
+@ relieff "relieff: ReliefF(discrete_threshold=10, n_features_to_select=3, n_jobs=1,
+    n_neighbors=100, verbose=False)"
+@ randomforestregressor "randomforestregressor: RandomForestRegressor(bootstrap=True, criterion='mse', max_depth=None,
+           max_features='auto', max_leaf_nodes=None,
+           min_impurity_decrease=0.0, min_impurity_split=None,
+           min_samples_leaf=1, min_samples_split=2,
+           min_weight_fraction_leaf=0.0, n_estimators='warn', n_jobs=1,
+           oob_score=False, random_state=None, verbose=0, warm_start=False)"
+@ relieff__discrete_threshold relieff__discrete_threshold: 10
+@ relieff__n_features_to_select relieff__n_features_to_select: 3
+* relieff__n_jobs relieff__n_jobs: 1
+@ relieff__n_neighbors relieff__n_neighbors: 100
+* relieff__verbose relieff__verbose: False
+@ randomforestregressor__bootstrap randomforestregressor__bootstrap: True
+@ randomforestregressor__criterion randomforestregressor__criterion: 'mse'
+@ randomforestregressor__max_depth randomforestregressor__max_depth: None
+@ randomforestregressor__max_features randomforestregressor__max_features: 'auto'
+@ randomforestregressor__max_leaf_nodes randomforestregressor__max_leaf_nodes: None
+@ randomforestregressor__min_impurity_decrease randomforestregressor__min_impurity_decrease: 0.0
+@ randomforestregressor__min_impurity_split randomforestregressor__min_impurity_split: None
+@ randomforestregressor__min_samples_leaf randomforestregressor__min_samples_leaf: 1
+@ randomforestregressor__min_samples_split randomforestregressor__min_samples_split: 2
+@ randomforestregressor__min_weight_fraction_leaf randomforestregressor__min_weight_fraction_leaf: 0.0
+@ randomforestregressor__n_estimators randomforestregressor__n_estimators: 'warn'
+* randomforestregressor__n_jobs randomforestregressor__n_jobs: 1
+@ randomforestregressor__oob_score randomforestregressor__oob_score: False
+@ randomforestregressor__random_state randomforestregressor__random_state: None
+* randomforestregressor__verbose randomforestregressor__verbose: 0
+@ randomforestregressor__warm_start randomforestregressor__warm_start: False
+ Note: @, searchable params in searchcv too.

diff -r 9d234733ccfd -r abb5a3f256e3 test-data/get_params10.tabular
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/get_params10.tabular Tue May 14 18:11:02 2019 -0400

[

@@ -0,0 +1,12 @@
+ Parameter Value
+* memory memory: None
+* steps "steps: [('adaboostregressor', AdaBoostRegressor(base_estimator=None, learning_rate=1.0, loss='linear',
+ n_estimators=50, random_state=None))]"
+@ adaboostregressor "adaboostregressor: AdaBoostRegressor(base_estimator=None, learning_rate=1.0, loss='linear',
+ n_estimators=50, random_state=None)"
+@ adaboostregressor__base_estimator adaboostregressor__base_estimator: None
+@ adaboostregressor__learning_rate adaboostregressor__learning_rate: 1.0
+@ adaboostregressor__loss adaboostregressor__loss: 'linear'
+@ adaboostregressor__n_estimators adaboostregressor__n_estimators: 50
+@ adaboostregressor__random_state adaboostregressor__random_state: None
+ Note: @, params eligible for search in searchcv tool.

diff -r 9d234733ccfd -r abb5a3f256e3 test-data/get_params11.tabular
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/get_params11.tabular Tue May 14 18:11:02 2019 -0400

[

@@ -0,0 +1,46 @@
+ Parameter Value
+* memory memory: None
+* steps "steps: [('editednearestneighbours', EditedNearestNeighbours(kind_sel='all', n_jobs=1, n_neighbors=3,
+            random_state=None, ratio=None, return_indices=False,
+            sampling_strategy='auto')), ('randomforestclassifier', RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
+            max_depth=None, max_features='auto', max_leaf_nodes=None,
+            min_impurity_decrease=0.0, min_impurity_split=None,
+            min_samples_leaf=1, min_samples_split=2,
+            min_weight_fraction_leaf=0.0, n_estimators='warn', n_jobs=1,
+            oob_score=False, random_state=None, verbose=0,
+            warm_start=False))]"
+@ editednearestneighbours "editednearestneighbours: EditedNearestNeighbours(kind_sel='all', n_jobs=1, n_neighbors=3,
+            random_state=None, ratio=None, return_indices=False,
+            sampling_strategy='auto')"
+@ randomforestclassifier "randomforestclassifier: RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
+            max_depth=None, max_features='auto', max_leaf_nodes=None,
+            min_impurity_decrease=0.0, min_impurity_split=None,
+            min_samples_leaf=1, min_samples_split=2,
+            min_weight_fraction_leaf=0.0, n_estimators='warn', n_jobs=1,
+            oob_score=False, random_state=None, verbose=0,
+            warm_start=False)"
+@ editednearestneighbours__kind_sel editednearestneighbours__kind_sel: 'all'
+* editednearestneighbours__n_jobs editednearestneighbours__n_jobs: 1
+@ editednearestneighbours__n_neighbors editednearestneighbours__n_neighbors: 3
+@ editednearestneighbours__random_state editednearestneighbours__random_state: None
+@ editednearestneighbours__ratio editednearestneighbours__ratio: None
+@ editednearestneighbours__return_indices editednearestneighbours__return_indices: False
+@ editednearestneighbours__sampling_strategy editednearestneighbours__sampling_strategy: 'auto'
+@ randomforestclassifier__bootstrap randomforestclassifier__bootstrap: True
+@ randomforestclassifier__class_weight randomforestclassifier__class_weight: None
+@ randomforestclassifier__criterion randomforestclassifier__criterion: 'gini'
+@ randomforestclassifier__max_depth randomforestclassifier__max_depth: None
+@ randomforestclassifier__max_features randomforestclassifier__max_features: 'auto'
+@ randomforestclassifier__max_leaf_nodes randomforestclassifier__max_leaf_nodes: None
+@ randomforestclassifier__min_impurity_decrease randomforestclassifier__min_impurity_decrease: 0.0
+@ randomforestclassifier__min_impurity_split randomforestclassifier__min_impurity_split: None
+@ randomforestclassifier__min_samples_leaf randomforestclassifier__min_samples_leaf: 1
+@ randomforestclassifier__min_samples_split randomforestclassifier__min_samples_split: 2
+@ randomforestclassifier__min_weight_fraction_leaf randomforestclassifier__min_weight_fraction_leaf: 0.0
+@ randomforestclassifier__n_estimators randomforestclassifier__n_estimators: 'warn'
+* randomforestclassifier__n_jobs randomforestclassifier__n_jobs: 1
+@ randomforestclassifier__oob_score randomforestclassifier__oob_score: False
+@ randomforestclassifier__random_state randomforestclassifier__random_state: None
+* randomforestclassifier__verbose randomforestclassifier__verbose: 0
+@ randomforestclassifier__warm_start randomforestclassifier__warm_start: False
+ Note: @, searchable params in searchcv too.

diff -r 9d234733ccfd -r abb5a3f256e3 test-data/get_params12.tabular
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/get_params12.tabular Tue May 14 18:11:02 2019 -0400

[

@@ -0,0 +1,47 @@
+ Parameter Value
+* memory memory: None
+* steps "steps: [('rfe', RFE(estimator=XGBRegressor(base_score=0.5, booster='gbtree', colsample_bylevel=1,
+       colsample_bytree=1, gamma=0, learning_rate=0.1, max_delta_step=0,
+       max_depth=3, min_child_weight=1, missing=nan, n_estimators=100,
+       n_jobs=1, nthread=None, objective='reg:linear', random_state=0,
+       reg_alpha=0, reg_lambda=1, scale_pos_weight=1, seed=None,
+       silent=True, subsample=1),
+  n_features_to_select=None, step=1, verbose=0))]"
+@ rfe "rfe: RFE(estimator=XGBRegressor(base_score=0.5, booster='gbtree', colsample_bylevel=1,
+       colsample_bytree=1, gamma=0, learning_rate=0.1, max_delta_step=0,
+       max_depth=3, min_child_weight=1, missing=nan, n_estimators=100,
+       n_jobs=1, nthread=None, objective='reg:linear', random_state=0,
+       reg_alpha=0, reg_lambda=1, scale_pos_weight=1, seed=None,
+       silent=True, subsample=1),
+  n_features_to_select=None, step=1, verbose=0)"
+@ rfe__estimator__base_score rfe__estimator__base_score: 0.5
+@ rfe__estimator__booster rfe__estimator__booster: 'gbtree'
+@ rfe__estimator__colsample_bylevel rfe__estimator__colsample_bylevel: 1
+@ rfe__estimator__colsample_bytree rfe__estimator__colsample_bytree: 1
+@ rfe__estimator__gamma rfe__estimator__gamma: 0
+@ rfe__estimator__learning_rate rfe__estimator__learning_rate: 0.1
+@ rfe__estimator__max_delta_step rfe__estimator__max_delta_step: 0
+@ rfe__estimator__max_depth rfe__estimator__max_depth: 3
+@ rfe__estimator__min_child_weight rfe__estimator__min_child_weight: 1
+@ rfe__estimator__missing rfe__estimator__missing: nan
+@ rfe__estimator__n_estimators rfe__estimator__n_estimators: 100
+* rfe__estimator__n_jobs rfe__estimator__n_jobs: 1
+* rfe__estimator__nthread rfe__estimator__nthread: None
+@ rfe__estimator__objective rfe__estimator__objective: 'reg:linear'
+@ rfe__estimator__random_state rfe__estimator__random_state: 0
+@ rfe__estimator__reg_alpha rfe__estimator__reg_alpha: 0
+@ rfe__estimator__reg_lambda rfe__estimator__reg_lambda: 1
+@ rfe__estimator__scale_pos_weight rfe__estimator__scale_pos_weight: 1
+@ rfe__estimator__seed rfe__estimator__seed: None
+@ rfe__estimator__silent rfe__estimator__silent: True
+@ rfe__estimator__subsample rfe__estimator__subsample: 1
+@ rfe__estimator "rfe__estimator: XGBRegressor(base_score=0.5, booster='gbtree', colsample_bylevel=1,
+       colsample_bytree=1, gamma=0, learning_rate=0.1, max_delta_step=0,
+       max_depth=3, min_child_weight=1, missing=nan, n_estimators=100,
+       n_jobs=1, nthread=None, objective='reg:linear', random_state=0,
+       reg_alpha=0, reg_lambda=1, scale_pos_weight=1, seed=None,
+       silent=True, subsample=1)"
+@ rfe__n_features_to_select rfe__n_features_to_select: None
+@ rfe__step rfe__step: 1
+* rfe__verbose rfe__verbose: 0
+ Note: @, searchable params in searchcv too.

diff -r 9d234733ccfd -r abb5a3f256e3 test-data/mv_result01.tabular
--- a/test-data/mv_result01.tabular Sun Dec 30 01:55:30 2018 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000

@@ -1,3 +0,0 @@
-0.9452947345848994
-0.9926363525448115
--0.4384003222944141

diff -r 9d234733ccfd -r abb5a3f256e3 test-data/mv_result02.tabular
--- a/test-data/mv_result02.tabular Sun Dec 30 01:55:30 2018 -0500
+++ b/test-data/mv_result02.tabular Tue May 14 18:11:02 2019 -0400

@@ -1,10 +1,11 @@
-1.6957921248350636
--0.9248588846061156
--0.48640795813792376
-0.647707440306449
-0.32740690920811427
--0.8229559569886034
-1.2150108977866847
-0.14723254190255275
-0.6053186541119763
-0.3972102859168325
+Predicted
+1.578912095858962
+-1.199072894940544
+-0.7173258906076226
+0.3255908318822695
+0.21919344304093213
+-0.6841926371423699
+1.1144698671662865
+0.19379531649046616
+0.9405094785593062
+1.2581284896870837

diff -r 9d234733ccfd -r abb5a3f256e3 test-data/mv_result03.tabular
--- a/test-data/mv_result03.tabular Sun Dec 30 01:55:30 2018 -0500
+++ b/test-data/mv_result03.tabular Tue May 14 18:11:02 2019 -0400

@@ -1,3 +1,6 @@
-0.9452947345848994
-0.9926363525448115
--0.4384003222944141
+train_sizes_abs mean_train_scores std_train_scores mean_test_scores std_test_scores
+17 0.9668700841937653 0.00277836829836518 0.7008862995946905 0.03857541198731935
+56 0.9730008602419361 0.006839342612121988 0.7963376762427242 0.004846330083938778
+95 0.9728783377589098 0.0037790183626530663 0.814592845745573 0.020457691766770824
+134 0.9739086338111185 0.001627343246847077 0.7985540571195479 0.03954641079310707
+174 0.9726218628287785 0.0032867750457225182 0.8152971572131146 0.04280261115004303

diff -r 9d234733ccfd -r abb5a3f256e3 test-data/mv_result04.tabular
--- a/test-data/mv_result04.tabular Sun Dec 30 01:55:30 2018 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000

@@ -1,5 +0,0 @@
-17
-56
-95
-134
-174

diff -r 9d234733ccfd -r abb5a3f256e3 test-data/mv_result05.tabular
--- a/test-data/mv_result05.tabular Sun Dec 30 01:55:30 2018 -0500
+++ b/test-data/mv_result05.tabular Tue May 14 18:11:02 2019 -0400

@@ -1,1 +1,262 @@
-0.4998435882784322
+Predicted
+70.16
+62.06
+83.04
+62.84
+48.63
+51.25
+54.98
+80.3
+42.84
+41.52
+43.83
+73.15
+74.22
+42.88
+74.93
+72.9
+53.74
+78.86
+59.0
+40.28
+54.52
+58.34
+62.74
+62.35
+49.15
+41.92
+65.59
+59.91
+66.49
+72.08
+60.44
+53.84
+54.82
+52.66
+42.37
+61.3
+63.14
+50.62
+42.75
+47.39
+67.8
+73.58
+49.97
+67.04
+67.45
+54.67
+64.87
+77.23
+73.52
+53.55
+70.53
+77.98
+61.99
+53.08
+78.12
+66.55
+63.95
+60.57
+61.6
+60.37
+55.29
+54.31
+52.54
+65.31
+61.51
+57.3
+60.02
+43.64
+74.78
+68.26
+42.72
+61.26
+61.25
+71.58
+61.03
+70.53
+70.25
+43.4
+71.39
+72.31
+72.7
+72.11
+53.55
+43.4
+80.6
+73.72
+58.86
+76.71
+68.36
+60.26
+48.56
+38.96
+69.67
+52.9
+67.63
+75.12
+70.92
+70.89
+67.05
+43.89
+59.94
+62.98
+71.1
+79.22
+77.31
+79.06
+61.11
+66.32
+54.7
+61.1
+54.59
+58.7
+59.6
+73.79
+72.69
+81.83
+61.08
+69.21
+74.8
+54.37
+50.85
+53.07
+58.53
+55.44
+72.62
+54.14
+68.12
+48.81
+50.11
+56.06
+73.63
+63.29
+71.0
+74.87
+81.24
+54.67
+66.96
+61.37
+74.84
+76.71
+69.27
+56.53
+71.91
+58.74
+77.83
+64.57
+51.93
+42.84
+64.11
+59.47
+42.46
+43.79
+51.75
+63.98
+54.71
+64.95
+79.72
+72.12
+60.66
+79.3
+71.26
+59.9
+74.25
+59.68
+52.37
+78.52
+58.52
+71.98
+71.77
+54.48
+48.96
+81.42
+54.08
+53.52
+64.38
+70.79
+63.95
+67.48
+61.76
+66.15
+62.1
+75.68
+69.72
+43.8
+56.27
+53.38
+81.31
+57.54
+48.15
+59.47
+78.01
+56.39
+72.33
+78.8
+78.66
+52.01
+66.68
+48.56
+47.75
+65.67
+77.93
+72.68
+58.0
+77.83
+73.37
+65.39
+69.79
+55.98
+46.35
+54.31
+55.58
+79.69
+52.76
+62.62
+66.54
+60.29
+62.57
+74.86
+48.05
+65.09
+65.02
+67.84
+41.86
+62.28
+57.05
+43.68
+72.0
+63.04
+54.41
+73.37
+75.11
+42.65
+73.16
+71.68
+58.61
+53.54
+73.33
+72.16
+49.96
+54.78
+64.24
+60.13
+76.46
+61.53
+68.36
+53.1
+71.33
+76.12
+70.86
+61.35
+67.12
+43.25
+80.2
+71.16
+58.63
+52.37
+74.93
+53.34
+76.41
+63.87
+59.97

diff -r 9d234733ccfd -r abb5a3f256e3 test-data/mv_result06.tabular
--- a/test-data/mv_result06.tabular Sun Dec 30 01:55:30 2018 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000

@@ -1,5 +0,0 @@
-0.07547169811320754 0.10344827586206896 0.10294117647058823
-0.07547169811320754 0.10344827586206896 0.10294117647058823
-0.07547169811320754 0.10344827586206896 0.10294117647058823
-0.07547169811320754 0.10344827586206896 0.10294117647058823
-0.07547169811320754 0.10344827586206896 0.10294117647058823

diff -r 9d234733ccfd -r abb5a3f256e3 test-data/named_steps.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/named_steps.txt Tue May 14 18:11:02 2019 -0400

@@ -0,0 +1,6 @@
+{'preprocessing_1': SelectKBest(k=10, score_func=<function f_regression at 0x113310ea0>), 'estimator': XGBRegressor(base_score=0.5, booster='gbtree', colsample_bylevel=1,
+       colsample_bytree=1, gamma=0, learning_rate=0.1, max_delta_step=0,
+       max_depth=3, min_child_weight=1, missing=nan, n_estimators=100,
+       n_jobs=1, nthread=None, objective='reg:linear', random_state=10,
+       reg_alpha=0, reg_lambda=1, scale_pos_weight=1, seed=None,
+       silent=True, subsample=1)}
\ No newline at end of file

diff -r 9d234733ccfd -r abb5a3f256e3 test-data/nn_model01

Binary file test-data/nn_model01 has changed

diff -r 9d234733ccfd -r abb5a3f256e3 test-data/pipeline01

Binary file test-data/pipeline01 has changed

diff -r 9d234733ccfd -r abb5a3f256e3 test-data/pipeline02

Binary file test-data/pipeline02 has changed

diff -r 9d234733ccfd -r abb5a3f256e3 test-data/pipeline03

Binary file test-data/pipeline03 has changed

diff -r 9d234733ccfd -r abb5a3f256e3 test-data/pipeline04

Binary file test-data/pipeline04 has changed

diff -r 9d234733ccfd -r abb5a3f256e3 test-data/pipeline05

Binary file test-data/pipeline05 has changed

diff -r 9d234733ccfd -r abb5a3f256e3 test-data/pipeline06

Binary file test-data/pipeline06 has changed

diff -r 9d234733ccfd -r abb5a3f256e3 test-data/pipeline07

Binary file test-data/pipeline07 has changed

diff -r 9d234733ccfd -r abb5a3f256e3 test-data/pipeline08

Binary file test-data/pipeline08 has changed

diff -r 9d234733ccfd -r abb5a3f256e3 test-data/pipeline09

Binary file test-data/pipeline09 has changed

diff -r 9d234733ccfd -r abb5a3f256e3 test-data/pipeline10

Binary file test-data/pipeline10 has changed

diff -r 9d234733ccfd -r abb5a3f256e3 test-data/pipeline11

Binary file test-data/pipeline11 has changed

diff -r 9d234733ccfd -r abb5a3f256e3 test-data/pipeline12

Binary file test-data/pipeline12 has changed

diff -r 9d234733ccfd -r abb5a3f256e3 test-data/pipeline13

Binary file test-data/pipeline13 has changed

diff -r 9d234733ccfd -r abb5a3f256e3 test-data/pipeline14

Binary file test-data/pipeline14 has changed

diff -r 9d234733ccfd -r abb5a3f256e3 test-data/pipeline15

Binary file test-data/pipeline15 has changed

diff -r 9d234733ccfd -r abb5a3f256e3 test-data/ranking_.tabular
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/ranking_.tabular Tue May 14 18:11:02 2019 -0400

@@ -0,0 +1,18 @@
+ranking_
+17
+7
+4
+5
+2
+1
+9
+6
+8
+3
+10
+15
+14
+11
+13
+12
+16

diff -r 9d234733ccfd -r abb5a3f256e3 test-data/searchCV01

Binary file test-data/searchCV01 has changed

diff -r 9d234733ccfd -r abb5a3f256e3 test-data/searchCV02

Binary file test-data/searchCV02 has changed

diff -r 9d234733ccfd -r abb5a3f256e3 utils.py
--- a/utils.py Sun Dec 30 01:55:30 2018 -0500
+++ b/utils.py Tue May 14 18:11:02 2019 -0400

[

b'@@ -1,80 +1,134 @@\n+import ast\n import json\n+import imblearn\n import numpy as np\n-import os\n import pandas\n import pickle\n import re\n import scipy\n import sklearn\n+import skrebate\n import sys\n import warnings\n import xgboost\n \n+from collections import Counter\n from asteval import Interpreter, make_symbol_table\n-from sklearn import (cluster, compose, decomposition, ensemble, feature_extraction,\n- feature_selection, gaussian_process, kernel_approximation, metrics,\n- model_selection, naive_bayes, neighbors, pipeline, preprocessing,\n- svm, linear_model, tree, discriminant_analysis)\n+from imblearn import under_sampling, over_sampling, combine\n+from imblearn.pipeline import Pipeline as imbPipeline\n+from mlxtend import regressor, classifier\n+from scipy.io import mmread\n+from sklearn import (\n+ cluster, compose, decomposition, ensemble, feature_extraction,\n+ feature_selection, gaussian_process, kernel_approximation, metrics,\n+ model_selection, naive_bayes, neighbors, pipeline, preprocessing,\n+ svm, linear_model, tree, discriminant_analysis)\n+\n+try:\n+ import iraps_classifier\n+except ImportError:\n+ pass\n \n try:\n- import skrebate\n-except ModuleNotFoundError:\n+ import model_validations\n+except ImportError:\n+ pass\n+\n+try:\n+ import feature_selectors\n+except ImportError:\n pass\n \n-\n-N_JOBS = int(os.environ.get(\'GALAXY_SLOTS\', 1))\n+try:\n+ import preprocessors\n+except ImportError:\n+ pass\n \n-try:\n- sk_whitelist\n-except NameError:\n- sk_whitelist = None\n+# handle pickle white list file\n+WL_FILE = __import__(\'os\').path.join(\n+ __import__(\'os\').path.dirname(__file__), \'pk_whitelist.json\')\n+\n+N_JOBS = int(__import__(\'os\').environ.get(\'GALAXY_SLOTS\', 1))\n \n \n-class SafePickler(pickle.Unpickler):\n+class _SafePickler(pickle.Unpickler, object):\n """\n- Used to safely deserialize scikit-learn model objects serialized by cPickle.dump\n+ Used to safely deserialize scikit-learn model objects\n Usage:\n- eg.: SafePickler.load(pickled_file_object)\n+ eg.: _SafePickler.load(pickled_file_object)\n """\n- def find_class(self, module, name):\n+ def __init__(self, file):\n+ super(_SafePickler, self).__init__(file)\n+ # load global white list\n+ with open(WL_FILE, \'r\') as f:\n+ self.pk_whitelist = json.load(f)\n \n- # sk_whitelist could be read from tool\n- global sk_whitelist\n- if not sk_whitelist:\n- whitelist_file = os.path.join(os.path.dirname(__file__), \'sk_whitelist.json\')\n- with open(whitelist_file, \'r\') as f:\n- sk_whitelist = json.load(f)\n+ self.bad_names = (\n+ \'and\', \'as\', \'assert\', \'break\', \'class\', \'continue\',\n+ \'def\', \'del\', \'elif\', \'else\', \'except\', \'exec\',\n+ \'finally\', \'for\', \'from\', \'global\', \'if\', \'import\',\n+ \'in\', \'is\', \'lambda\', \'not\', \'or\', \'pass\', \'print\',\n+ \'raise\', \'return\', \'try\', \'system\', \'while\', \'with\',\n+ \'True\', \'False\', \'None\', \'eval\', \'execfile\', \'__import__\',\n+ \'__package__\', \'__subclasses__\', \'__bases__\', \'__globals__\',\n+ \'__code__\', \'__closure__\', \'__func__\', \'__self__\', \'__module__\',\n+ \'__dict__\', \'__class__\', \'__call__\', \'__get__\',\n+ \'__getattribute__\', \'__subclasshook__\', \'__new__\',\n+ \'__init__\', \'func_globals\', \'func_code\', \'func_closure\',\n+ \'im_class\', \'im_func\', \'im_self\', \'gi_code\', \'gi_frame\',\n+ \'__asteval__\', \'f_locals\', \'__mro__\')\n \n- bad_names = (\'and\', \'as\', \'assert\', \'break\', \'class\', \'continue\',\n- \'def\', \'del\', \'elif\', \'else\', \'except\', \'exec\',\n- \'finally\', \'for\', \'from\', \'global\', \'if\', \'import\',\n- \'in\', \'is\', \'lambda\', \'not\', \'or\', \'pass\', \'print\',\n- \'raise\', \'return\', \'try\', \'system\', \'while\', \'with\',\n- \'True\', \'False\', \'None\', \'eval\', \'execfile\', \'__impo'..b'eader_name\', \'all_but_by_header_name\']:\n+ c = groups[\'column_selector_options_g\'][\'col_g\']\n+ else:\n+ c = None\n+ groups = read_columns(\n+ infile_g,\n+ c=c,\n+ c_option=column_option,\n+ sep=\'\\t\',\n+ header=header,\n+ parse_dates=True)\n+ groups = groups.ravel()\n \n for k, v in cv_json.items():\n if v == \'\':\n@@ -341,7 +502,12 @@\n if test_size and test_size > 1.0:\n cv_json[\'test_size\'] = int(test_size)\n \n- cv_class = getattr(model_selection, cv)\n+ if cv == \'OrderedKFold\':\n+ cv_class = try_get_attr(\'model_validations\', \'OrderedKFold\')\n+ elif cv == \'RepeatedOrderedKFold\':\n+ cv_class = try_get_attr(\'model_validations\', \'RepeatedOrderedKFold\')\n+ else:\n+ cv_class = getattr(model_selection, cv)\n splitter = cv_class(**cv_json)\n \n return splitter, groups\n@@ -349,6 +515,9 @@\n \n # needed when sklearn < v0.20\n def balanced_accuracy_score(y_true, y_pred):\n+ """Compute balanced accuracy score, which is now available in\n+ scikit-learn from v0.20.0.\n+ """\n C = metrics.confusion_matrix(y_true, y_pred)\n with np.errstate(divide=\'ignore\', invalid=\'ignore\'):\n per_class = np.diag(C) / C.sum(axis=1)\n@@ -360,21 +529,71 @@\n \n \n def get_scoring(scoring_json):\n-\n+ """Return single sklearn scorer class\n+ or multiple scoers in dictionary\n+ """\n if scoring_json[\'primary_scoring\'] == \'default\':\n return None\n \n my_scorers = metrics.SCORERS\n+ my_scorers[\'binarize_auc_scorer\'] =\\\n+ try_get_attr(\'iraps_classifier\', \'binarize_auc_scorer\')\n+ my_scorers[\'binarize_average_precision_scorer\'] =\\\n+ try_get_attr(\'iraps_classifier\', \'binarize_average_precision_scorer\')\n if \'balanced_accuracy\' not in my_scorers:\n- my_scorers[\'balanced_accuracy\'] = metrics.make_scorer(balanced_accuracy_score)\n+ my_scorers[\'balanced_accuracy\'] =\\\n+ metrics.make_scorer(balanced_accuracy_score)\n \n if scoring_json[\'secondary_scoring\'] != \'None\'\\\n- and scoring_json[\'secondary_scoring\'] != scoring_json[\'primary_scoring\']:\n- scoring = {}\n- scoring[\'primary\'] = my_scorers[scoring_json[\'primary_scoring\']]\n+ and scoring_json[\'secondary_scoring\'] !=\\\n+ scoring_json[\'primary_scoring\']:\n+ return_scoring = {}\n+ primary_scoring = scoring_json[\'primary_scoring\']\n+ return_scoring[primary_scoring] = my_scorers[primary_scoring]\n for scorer in scoring_json[\'secondary_scoring\'].split(\',\'):\n if scorer != scoring_json[\'primary_scoring\']:\n- scoring[scorer] = my_scorers[scorer]\n- return scoring\n+ return_scoring[scorer] = my_scorers[scorer]\n+ return return_scoring\n \n return my_scorers[scoring_json[\'primary_scoring\']]\n+\n+\n+def get_search_params(estimator):\n+ """Format the output of `estimator.get_params()`\n+ """\n+ params = estimator.get_params()\n+ results = []\n+ for k, v in params.items():\n+ # params below won\'t be shown for search in the searchcv tool\n+ keywords = (\'n_jobs\', \'pre_dispatch\', \'memory\', \'steps\',\n+ \'nthread\', \'verbose\')\n+ if k.endswith(keywords):\n+ results.append([\'*\', k, k+": "+repr(v)])\n+ else:\n+ results.append([\'@\', k, k+": "+repr(v)])\n+ results.append(\n+ ["", "Note:",\n+ "@, params eligible for search in searchcv tool."])\n+\n+ return results\n+\n+\n+def try_get_attr(module, name):\n+ """try to get attribute from a custom module\n+\n+ Parameters\n+ ----------\n+ module : str\n+ Module name\n+ name : str\n+ Attribute (class/function) name.\n+\n+ Returns\n+ -------\n+ class or function\n+ """\n+ mod = sys.modules.get(module, None)\n+ if mod:\n+ return getattr(mod, name)\n+ else:\n+ raise Exception("No module named %s." % module)\n'