Mercurial > repos > bgruening > sklearn_feature_selection
diff utils.py @ 18:ec25331946b8 draft
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit c0a3a186966888e5787335a7628bf0a4382637e7
author | bgruening |
---|---|
date | Tue, 14 May 2019 18:17:57 -0400 |
parents | 2bbbac61e48d |
children |
line wrap: on
line diff
--- a/utils.py Sun Dec 30 01:57:11 2018 -0500 +++ b/utils.py Tue May 14 18:17:57 2019 -0400 @@ -1,80 +1,134 @@ +import ast import json +import imblearn import numpy as np -import os import pandas import pickle import re import scipy import sklearn +import skrebate import sys import warnings import xgboost +from collections import Counter from asteval import Interpreter, make_symbol_table -from sklearn import (cluster, compose, decomposition, ensemble, feature_extraction, - feature_selection, gaussian_process, kernel_approximation, metrics, - model_selection, naive_bayes, neighbors, pipeline, preprocessing, - svm, linear_model, tree, discriminant_analysis) +from imblearn import under_sampling, over_sampling, combine +from imblearn.pipeline import Pipeline as imbPipeline +from mlxtend import regressor, classifier +from scipy.io import mmread +from sklearn import ( + cluster, compose, decomposition, ensemble, feature_extraction, + feature_selection, gaussian_process, kernel_approximation, metrics, + model_selection, naive_bayes, neighbors, pipeline, preprocessing, + svm, linear_model, tree, discriminant_analysis) + +try: + import iraps_classifier +except ImportError: + pass try: - import skrebate -except ModuleNotFoundError: + import model_validations +except ImportError: + pass + +try: + import feature_selectors +except ImportError: pass - -N_JOBS = int(os.environ.get('GALAXY_SLOTS', 1)) +try: + import preprocessors +except ImportError: + pass -try: - sk_whitelist -except NameError: - sk_whitelist = None +# handle pickle white list file +WL_FILE = __import__('os').path.join( + __import__('os').path.dirname(__file__), 'pk_whitelist.json') + +N_JOBS = int(__import__('os').environ.get('GALAXY_SLOTS', 1)) -class SafePickler(pickle.Unpickler): +class _SafePickler(pickle.Unpickler, object): """ - Used to safely deserialize scikit-learn model objects serialized by cPickle.dump + Used to safely deserialize scikit-learn model objects Usage: - eg.: SafePickler.load(pickled_file_object) + eg.: _SafePickler.load(pickled_file_object) """ - def find_class(self, module, name): + def __init__(self, file): + super(_SafePickler, self).__init__(file) + # load global white list + with open(WL_FILE, 'r') as f: + self.pk_whitelist = json.load(f) - # sk_whitelist could be read from tool - global sk_whitelist - if not sk_whitelist: - whitelist_file = os.path.join(os.path.dirname(__file__), 'sk_whitelist.json') - with open(whitelist_file, 'r') as f: - sk_whitelist = json.load(f) + self.bad_names = ( + 'and', 'as', 'assert', 'break', 'class', 'continue', + 'def', 'del', 'elif', 'else', 'except', 'exec', + 'finally', 'for', 'from', 'global', 'if', 'import', + 'in', 'is', 'lambda', 'not', 'or', 'pass', 'print', + 'raise', 'return', 'try', 'system', 'while', 'with', + 'True', 'False', 'None', 'eval', 'execfile', '__import__', + '__package__', '__subclasses__', '__bases__', '__globals__', + '__code__', '__closure__', '__func__', '__self__', '__module__', + '__dict__', '__class__', '__call__', '__get__', + '__getattribute__', '__subclasshook__', '__new__', + '__init__', 'func_globals', 'func_code', 'func_closure', + 'im_class', 'im_func', 'im_self', 'gi_code', 'gi_frame', + '__asteval__', 'f_locals', '__mro__') - bad_names = ('and', 'as', 'assert', 'break', 'class', 'continue', - 'def', 'del', 'elif', 'else', 'except', 'exec', - 'finally', 'for', 'from', 'global', 'if', 'import', - 'in', 'is', 'lambda', 'not', 'or', 'pass', 'print', - 'raise', 'return', 'try', 'system', 'while', 'with', - 'True', 'False', 'None', 'eval', 'execfile', '__import__', - '__package__', '__subclasses__', '__bases__', '__globals__', - '__code__', '__closure__', '__func__', '__self__', '__module__', - '__dict__', '__class__', '__call__', '__get__', - '__getattribute__', '__subclasshook__', '__new__', - '__init__', 'func_globals', 'func_code', 'func_closure', - 'im_class', 'im_func', 'im_self', 'gi_code', 'gi_frame', - '__asteval__', 'f_locals', '__mro__') - good_names = ['copy_reg._reconstructor', '__builtin__.object'] + # unclassified good globals + self.good_names = [ + 'copy_reg._reconstructor', '__builtin__.object', + '__builtin__.bytearray', 'builtins.object', + 'builtins.bytearray', 'keras.engine.sequential.Sequential', + 'keras.engine.sequential.Model'] + + # custom module in Galaxy-ML + self.custom_modules = [ + '__main__', 'keras_galaxy_models', 'feature_selectors', + 'preprocessors', 'iraps_classifier', 'model_validations'] + # override + def find_class(self, module, name): + # balack list first + if name in self.bad_names: + raise pickle.UnpicklingError("global '%s.%s' is forbidden" + % (module, name)) + + # custom module in Galaxy-ML + if module in self.custom_modules: + cutom_module = sys.modules.get(module, None) + if cutom_module: + return getattr(cutom_module, name) + else: + raise pickle.UnpicklingError("Module %s' is not imported" + % module) + + # For objects from outside libraries, it's necessary to verify + # both module and name. Currently only a blacklist checker + # is working. + # TODO: replace with a whitelist checker. + good_names = self.good_names + pk_whitelist = self.pk_whitelist if re.match(r'^[a-zA-Z_][a-zA-Z0-9_]*$', name): fullname = module + '.' + name if (fullname in good_names)\ - or ( ( module.startswith('sklearn.') - or module.startswith('xgboost.') - or module.startswith('skrebate.') - or module.startswith('imblearn') - or module.startswith('numpy.') - or module == 'numpy' - ) - and (name not in bad_names) - ): - # TODO: replace with a whitelist checker - if fullname not in sk_whitelist['SK_NAMES'] + sk_whitelist['SKR_NAMES'] + sk_whitelist['XGB_NAMES'] + sk_whitelist['NUMPY_NAMES'] + sk_whitelist['IMBLEARN_NAMES'] + good_names: - print("Warning: global %s is not in pickler whitelist yet and will loss support soon. Contact tool author or leave a message at github.com" % fullname) + or (module.startswith(('sklearn.', 'xgboost.', 'skrebate.', + 'imblearn.', 'mlxtend.', 'numpy.')) + or module == 'numpy'): + if fullname not in (pk_whitelist['SK_NAMES'] + + pk_whitelist['SKR_NAMES'] + + pk_whitelist['XGB_NAMES'] + + pk_whitelist['NUMPY_NAMES'] + + pk_whitelist['IMBLEARN_NAMES'] + + pk_whitelist['MLXTEND_NAMES'] + + good_names): + # raise pickle.UnpicklingError + print("Warning: global %s is not in pickler whitelist " + "yet and will loss support soon. Contact tool " + "author or leave a message at github.com" % fullname) mod = sys.modules[module] return getattr(mod, name) @@ -82,10 +136,15 @@ def load_model(file): - return SafePickler(file).load() + """Load pickled object with `_SafePicker` + """ + return _SafePickler(file).load() -def read_columns(f, c=None, c_option='by_index_number', return_df=False, **args): +def read_columns(f, c=None, c_option='by_index_number', + return_df=False, **args): + """Return array from a tabular dataset by various columns selection + """ data = pandas.read_csv(f, **args) if c_option == 'by_index_number': cols = list(map(lambda x: x - 1, c)) @@ -106,10 +165,21 @@ return y -## generate an instance for one of sklearn.feature_selection classes -def feature_selector(inputs): +def feature_selector(inputs, X=None, y=None): + """generate an instance of sklearn.feature_selection classes + + Parameters + ---------- + inputs : dict + From galaxy tool parameters. + X : array + Containing training features. + y : array or list + Target values. + """ selector = inputs['selected_algorithm'] - selector = getattr(sklearn.feature_selection, selector) + if selector != 'DyRFECV': + selector = getattr(sklearn.feature_selection, selector) options = inputs['options'] if inputs['selected_algorithm'] == 'SelectFromModel': @@ -128,27 +198,60 @@ else: estimator_json = inputs['model_inputter']['estimator_selector'] estimator = get_estimator(estimator_json) + check_feature_importances = try_get_attr( + 'feature_selectors', 'check_feature_importances') + estimator = check_feature_importances(estimator) new_selector = selector(estimator, **options) elif inputs['selected_algorithm'] == 'RFE': - estimator = get_estimator(inputs['estimator_selector']) step = options.get('step', None) if step and step >= 1.0: options['step'] = int(step) + estimator = get_estimator(inputs["estimator_selector"]) + check_feature_importances = try_get_attr( + 'feature_selectors', 'check_feature_importances') + estimator = check_feature_importances(estimator) new_selector = selector(estimator, **options) elif inputs['selected_algorithm'] == 'RFECV': options['scoring'] = get_scoring(options['scoring']) options['n_jobs'] = N_JOBS splitter, groups = get_cv(options.pop('cv_selector')) - # TODO support group cv splitters - options['cv'] = splitter + if groups is None: + options['cv'] = splitter + else: + options['cv'] = list(splitter.split(X, y, groups=groups)) step = options.get('step', None) if step and step >= 1.0: options['step'] = int(step) estimator = get_estimator(inputs['estimator_selector']) + check_feature_importances = try_get_attr( + 'feature_selectors', 'check_feature_importances') + estimator = check_feature_importances(estimator) new_selector = selector(estimator, **options) + elif inputs['selected_algorithm'] == 'DyRFECV': + options['scoring'] = get_scoring(options['scoring']) + options['n_jobs'] = N_JOBS + splitter, groups = get_cv(options.pop('cv_selector')) + if groups is None: + options['cv'] = splitter + else: + options['cv'] = list(splitter.split(X, y, groups=groups)) + step = options.get('step') + if not step or step == 'None': + step = None + else: + step = ast.literal_eval(step) + options['step'] = step + estimator = get_estimator(inputs["estimator_selector"]) + check_feature_importances = try_get_attr( + 'feature_selectors', 'check_feature_importances') + estimator = check_feature_importances(estimator) + DyRFECV = try_get_attr('feature_selectors', 'DyRFECV') + + new_selector = DyRFECV(estimator, **options) + elif inputs['selected_algorithm'] == 'VarianceThreshold': new_selector = selector(**options) @@ -161,12 +264,20 @@ def get_X_y(params, file1, file2): - input_type = params['selected_tasks']['selected_algorithms']['input_options']['selected_input'] + """Return machine learning inputs X, y from tabluar inputs + """ + input_type = (params['selected_tasks']['selected_algorithms'] + ['input_options']['selected_input']) if input_type == 'tabular': - header = 'infer' if params['selected_tasks']['selected_algorithms']['input_options']['header1'] else None - column_option = params['selected_tasks']['selected_algorithms']['input_options']['column_selector_options_1']['selected_column_selector_option'] - if column_option in ['by_index_number', 'all_but_by_index_number', 'by_header_name', 'all_but_by_header_name']: - c = params['selected_tasks']['selected_algorithms']['input_options']['column_selector_options_1']['col1'] + header = 'infer' if (params['selected_tasks']['selected_algorithms'] + ['input_options']['header1']) else None + column_option = (params['selected_tasks']['selected_algorithms'] + ['input_options']['column_selector_options_1'] + ['selected_column_selector_option']) + if column_option in ['by_index_number', 'all_but_by_index_number', + 'by_header_name', 'all_but_by_header_name']: + c = (params['selected_tasks']['selected_algorithms'] + ['input_options']['column_selector_options_1']['col1']) else: c = None X = read_columns( @@ -175,15 +286,19 @@ c_option=column_option, sep='\t', header=header, - parse_dates=True - ) + parse_dates=True).astype(float) else: X = mmread(file1) - header = 'infer' if params['selected_tasks']['selected_algorithms']['input_options']['header2'] else None - column_option = params['selected_tasks']['selected_algorithms']['input_options']['column_selector_options_2']['selected_column_selector_option2'] - if column_option in ['by_index_number', 'all_but_by_index_number', 'by_header_name', 'all_but_by_header_name']: - c = params['selected_tasks']['selected_algorithms']['input_options']['column_selector_options_2']['col2'] + header = 'infer' if (params['selected_tasks']['selected_algorithms'] + ['input_options']['header2']) else None + column_option = (params['selected_tasks']['selected_algorithms'] + ['input_options']['column_selector_options_2'] + ['selected_column_selector_option2']) + if column_option in ['by_index_number', 'all_but_by_index_number', + 'by_header_name', 'all_but_by_header_name']: + c = (params['selected_tasks']['selected_algorithms'] + ['input_options']['column_selector_options_2']['col2']) else: c = None y = read_columns( @@ -192,15 +307,17 @@ c_option=column_option, sep='\t', header=header, - parse_dates=True - ) + parse_dates=True) y = y.ravel() + return X, y class SafeEval(Interpreter): - - def __init__(self, load_scipy=False, load_numpy=False, load_estimators=False): + """Customized symbol table for safely literal eval + """ + def __init__(self, load_scipy=False, load_numpy=False, + load_estimators=False): # File opening and other unneeded functions could be dropped unwanted = ['open', 'type', 'dir', 'id', 'str', 'repr'] @@ -208,7 +325,8 @@ # Allowed symbol table. Add more if needed. new_syms = { 'np_arange': getattr(np, 'arange'), - 'ensemble_ExtraTreesClassifier': getattr(ensemble, 'ExtraTreesClassifier') + 'ensemble_ExtraTreesClassifier': + getattr(ensemble, 'ExtraTreesClassifier') } syms = make_symbol_table(use_numpy=False, **new_syms) @@ -216,80 +334,109 @@ if load_scipy: scipy_distributions = scipy.stats.distributions.__dict__ for k, v in scipy_distributions.items(): - if isinstance(v, (scipy.stats.rv_continuous, scipy.stats.rv_discrete)): + if isinstance(v, (scipy.stats.rv_continuous, + scipy.stats.rv_discrete)): syms['scipy_stats_' + k] = v if load_numpy: - from_numpy_random = ['beta', 'binomial', 'bytes', 'chisquare', 'choice', 'dirichlet', 'division', - 'exponential', 'f', 'gamma', 'geometric', 'gumbel', 'hypergeometric', - 'laplace', 'logistic', 'lognormal', 'logseries', 'mtrand', 'multinomial', - 'multivariate_normal', 'negative_binomial', 'noncentral_chisquare', 'noncentral_f', - 'normal', 'pareto', 'permutation', 'poisson', 'power', 'rand', 'randint', - 'randn', 'random', 'random_integers', 'random_sample', 'ranf', 'rayleigh', - 'sample', 'seed', 'set_state', 'shuffle', 'standard_cauchy', 'standard_exponential', - 'standard_gamma', 'standard_normal', 'standard_t', 'triangular', 'uniform', - 'vonmises', 'wald', 'weibull', 'zipf'] + from_numpy_random = [ + 'beta', 'binomial', 'bytes', 'chisquare', 'choice', + 'dirichlet', 'division', 'exponential', 'f', 'gamma', + 'geometric', 'gumbel', 'hypergeometric', 'laplace', + 'logistic', 'lognormal', 'logseries', 'mtrand', + 'multinomial', 'multivariate_normal', 'negative_binomial', + 'noncentral_chisquare', 'noncentral_f', 'normal', 'pareto', + 'permutation', 'poisson', 'power', 'rand', 'randint', + 'randn', 'random', 'random_integers', 'random_sample', + 'ranf', 'rayleigh', 'sample', 'seed', 'set_state', + 'shuffle', 'standard_cauchy', 'standard_exponential', + 'standard_gamma', 'standard_normal', 'standard_t', + 'triangular', 'uniform', 'vonmises', 'wald', 'weibull', 'zipf'] for f in from_numpy_random: syms['np_random_' + f] = getattr(np.random, f) if load_estimators: estimator_table = { - 'sklearn_svm' : getattr(sklearn, 'svm'), - 'sklearn_tree' : getattr(sklearn, 'tree'), - 'sklearn_ensemble' : getattr(sklearn, 'ensemble'), - 'sklearn_neighbors' : getattr(sklearn, 'neighbors'), - 'sklearn_naive_bayes' : getattr(sklearn, 'naive_bayes'), - 'sklearn_linear_model' : getattr(sklearn, 'linear_model'), - 'sklearn_cluster' : getattr(sklearn, 'cluster'), - 'sklearn_decomposition' : getattr(sklearn, 'decomposition'), - 'sklearn_preprocessing' : getattr(sklearn, 'preprocessing'), - 'sklearn_feature_selection' : getattr(sklearn, 'feature_selection'), - 'sklearn_kernel_approximation' : getattr(sklearn, 'kernel_approximation'), + 'sklearn_svm': getattr(sklearn, 'svm'), + 'sklearn_tree': getattr(sklearn, 'tree'), + 'sklearn_ensemble': getattr(sklearn, 'ensemble'), + 'sklearn_neighbors': getattr(sklearn, 'neighbors'), + 'sklearn_naive_bayes': getattr(sklearn, 'naive_bayes'), + 'sklearn_linear_model': getattr(sklearn, 'linear_model'), + 'sklearn_cluster': getattr(sklearn, 'cluster'), + 'sklearn_decomposition': getattr(sklearn, 'decomposition'), + 'sklearn_preprocessing': getattr(sklearn, 'preprocessing'), + 'sklearn_feature_selection': + getattr(sklearn, 'feature_selection'), + 'sklearn_kernel_approximation': + getattr(sklearn, 'kernel_approximation'), 'skrebate_ReliefF': getattr(skrebate, 'ReliefF'), 'skrebate_SURF': getattr(skrebate, 'SURF'), 'skrebate_SURFstar': getattr(skrebate, 'SURFstar'), 'skrebate_MultiSURF': getattr(skrebate, 'MultiSURF'), 'skrebate_MultiSURFstar': getattr(skrebate, 'MultiSURFstar'), 'skrebate_TuRF': getattr(skrebate, 'TuRF'), - 'xgboost_XGBClassifier' : getattr(xgboost, 'XGBClassifier'), - 'xgboost_XGBRegressor' : getattr(xgboost, 'XGBRegressor') + 'xgboost_XGBClassifier': getattr(xgboost, 'XGBClassifier'), + 'xgboost_XGBRegressor': getattr(xgboost, 'XGBRegressor'), + 'imblearn_over_sampling': getattr(imblearn, 'over_sampling'), + 'imblearn_combine': getattr(imblearn, 'combine') } syms.update(estimator_table) for key in unwanted: syms.pop(key, None) - super(SafeEval, self).__init__(symtable=syms, use_numpy=False, minimal=False, - no_if=True, no_for=True, no_while=True, no_try=True, - no_functiondef=True, no_ifexp=True, no_listcomp=False, - no_augassign=False, no_assert=True, no_delete=True, - no_raise=True, no_print=True) - + super(SafeEval, self).__init__( + symtable=syms, use_numpy=False, minimal=False, + no_if=True, no_for=True, no_while=True, no_try=True, + no_functiondef=True, no_ifexp=True, no_listcomp=False, + no_augassign=False, no_assert=True, no_delete=True, + no_raise=True, no_print=True) def get_estimator(estimator_json): - + """Return a sklearn or compatible estimator from Galaxy tool inputs + """ estimator_module = estimator_json['selected_module'] - if estimator_module == 'customer_estimator': + if estimator_module == 'custom_estimator': c_estimator = estimator_json['c_estimator'] with open(c_estimator, 'rb') as model_handler: new_model = load_model(model_handler) return new_model + if estimator_module == "binarize_target": + wrapped_estimator = estimator_json['wrapped_estimator'] + with open(wrapped_estimator, 'rb') as model_handler: + wrapped_estimator = load_model(model_handler) + options = {} + if estimator_json['z_score'] is not None: + options['z_score'] = estimator_json['z_score'] + if estimator_json['value'] is not None: + options['value'] = estimator_json['value'] + options['less_is_positive'] = estimator_json['less_is_positive'] + if estimator_json['clf_or_regr'] == 'BinarizeTargetClassifier': + klass = try_get_attr('iraps_classifier', + 'BinarizeTargetClassifier') + else: + klass = try_get_attr('iraps_classifier', + 'BinarizeTargetRegressor') + return klass(wrapped_estimator, **options) + estimator_cls = estimator_json['selected_estimator'] if estimator_module == 'xgboost': - cls = getattr(xgboost, estimator_cls) + klass = getattr(xgboost, estimator_cls) else: module = getattr(sklearn, estimator_module) - cls = getattr(module, estimator_cls) + klass = getattr(module, estimator_cls) - estimator = cls() + estimator = klass() estimator_params = estimator_json['text_params'].strip() if estimator_params != '': try: + safe_eval = SafeEval() params = safe_eval('dict(' + estimator_params + ')') except ValueError: sys.exit("Unsupported parameter input: `%s`" % estimator_params) @@ -301,9 +448,13 @@ def get_cv(cv_json): - """ - cv_json: - e.g.: + """ Return CV splitter from Galaxy tool inputs + + Parameters + ---------- + cv_json : dict + From Galaxy tool inputs. + e.g.: { 'selected_cv': 'StratifiedKFold', 'n_splits': 3, @@ -315,15 +466,25 @@ if cv == 'default': return cv_json['n_splits'], None - groups = cv_json.pop('groups', None) - if groups: - groups = groups.strip() - if groups != '': - if groups.startswith('__ob__'): - groups = groups[6:] - if groups.endswith('__cb__'): - groups = groups[:-6] - groups = [int(x.strip()) for x in groups.split(',')] + groups = cv_json.pop('groups_selector', None) + if groups is not None: + infile_g = groups['infile_g'] + header = 'infer' if groups['header_g'] else None + column_option = (groups['column_selector_options_g'] + ['selected_column_selector_option_g']) + if column_option in ['by_index_number', 'all_but_by_index_number', + 'by_header_name', 'all_but_by_header_name']: + c = groups['column_selector_options_g']['col_g'] + else: + c = None + groups = read_columns( + infile_g, + c=c, + c_option=column_option, + sep='\t', + header=header, + parse_dates=True) + groups = groups.ravel() for k, v in cv_json.items(): if v == '': @@ -341,7 +502,12 @@ if test_size and test_size > 1.0: cv_json['test_size'] = int(test_size) - cv_class = getattr(model_selection, cv) + if cv == 'OrderedKFold': + cv_class = try_get_attr('model_validations', 'OrderedKFold') + elif cv == 'RepeatedOrderedKFold': + cv_class = try_get_attr('model_validations', 'RepeatedOrderedKFold') + else: + cv_class = getattr(model_selection, cv) splitter = cv_class(**cv_json) return splitter, groups @@ -349,6 +515,9 @@ # needed when sklearn < v0.20 def balanced_accuracy_score(y_true, y_pred): + """Compute balanced accuracy score, which is now available in + scikit-learn from v0.20.0. + """ C = metrics.confusion_matrix(y_true, y_pred) with np.errstate(divide='ignore', invalid='ignore'): per_class = np.diag(C) / C.sum(axis=1) @@ -360,21 +529,71 @@ def get_scoring(scoring_json): - + """Return single sklearn scorer class + or multiple scoers in dictionary + """ if scoring_json['primary_scoring'] == 'default': return None my_scorers = metrics.SCORERS + my_scorers['binarize_auc_scorer'] =\ + try_get_attr('iraps_classifier', 'binarize_auc_scorer') + my_scorers['binarize_average_precision_scorer'] =\ + try_get_attr('iraps_classifier', 'binarize_average_precision_scorer') if 'balanced_accuracy' not in my_scorers: - my_scorers['balanced_accuracy'] = metrics.make_scorer(balanced_accuracy_score) + my_scorers['balanced_accuracy'] =\ + metrics.make_scorer(balanced_accuracy_score) if scoring_json['secondary_scoring'] != 'None'\ - and scoring_json['secondary_scoring'] != scoring_json['primary_scoring']: - scoring = {} - scoring['primary'] = my_scorers[scoring_json['primary_scoring']] + and scoring_json['secondary_scoring'] !=\ + scoring_json['primary_scoring']: + return_scoring = {} + primary_scoring = scoring_json['primary_scoring'] + return_scoring[primary_scoring] = my_scorers[primary_scoring] for scorer in scoring_json['secondary_scoring'].split(','): if scorer != scoring_json['primary_scoring']: - scoring[scorer] = my_scorers[scorer] - return scoring + return_scoring[scorer] = my_scorers[scorer] + return return_scoring return my_scorers[scoring_json['primary_scoring']] + + +def get_search_params(estimator): + """Format the output of `estimator.get_params()` + """ + params = estimator.get_params() + results = [] + for k, v in params.items(): + # params below won't be shown for search in the searchcv tool + keywords = ('n_jobs', 'pre_dispatch', 'memory', 'steps', + 'nthread', 'verbose') + if k.endswith(keywords): + results.append(['*', k, k+": "+repr(v)]) + else: + results.append(['@', k, k+": "+repr(v)]) + results.append( + ["", "Note:", + "@, params eligible for search in searchcv tool."]) + + return results + + +def try_get_attr(module, name): + """try to get attribute from a custom module + + Parameters + ---------- + module : str + Module name + name : str + Attribute (class/function) name. + + Returns + ------- + class or function + """ + mod = sys.modules.get(module, None) + if mod: + return getattr(mod, name) + else: + raise Exception("No module named %s." % module)