Previous changeset 2:9ddacd0b8c8b (2018-08-07) Next changeset 4:86450dde8682 (2018-08-23) |
Commit message:
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7 |
modified:
main_macros.xml pipeline.xml |
added:
test-data/pipeline09 test-data/pipeline10 utils.py |
b |
diff -r 9ddacd0b8c8b -r 0857964e07c2 main_macros.xml --- a/main_macros.xml Tue Aug 07 05:45:45 2018 -0400 +++ b/main_macros.xml Fri Aug 17 12:26:40 2018 -0400 |
[ |
b'@@ -1,216 +1,13 @@\n <macros>\n <token name="@VERSION@">0.9</token>\n \n- <token name="@COLUMNS_FUNCTION@">\n-def read_columns(f, c=None, c_option=\'by_index_number\', return_df=False, **args):\n- data = pandas.read_csv(f, **args)\n- if c_option == \'by_index_number\':\n- cols = list(map(lambda x: x - 1, c))\n- data = data.iloc[:,cols]\n- if c_option == \'all_but_by_index_number\':\n- cols = list(map(lambda x: x - 1, c))\n- data.drop(data.columns[cols], axis=1, inplace=True)\n- if c_option == \'by_header_name\':\n- cols = [e.strip() for e in c.split(\',\')]\n- data = data[cols]\n- if c_option == \'all_but_by_header_name\':\n- cols = [e.strip() for e in c.split(\',\')]\n- data.drop(cols, axis=1, inplace=True)\n- y = data.values\n- if return_df:\n- return y, data\n- else:\n- return y\n- return y\n- </token>\n-\n-## generate an instance for one of sklearn.feature_selection classes\n- <token name="@FEATURE_SELECTOR_FUNCTION@">\n-def feature_selector(inputs):\n- selector = inputs["selected_algorithm"]\n- selector = getattr(sklearn.feature_selection, selector)\n- options = inputs["options"]\n-\n- if inputs[\'selected_algorithm\'] == \'SelectFromModel\':\n- if not options[\'threshold\'] or options[\'threshold\'] == \'None\':\n- options[\'threshold\'] = None\n- if inputs[\'model_inputter\'][\'input_mode\'] == \'prefitted\':\n- model_file = inputs[\'model_inputter\'][\'fitted_estimator\']\n- with open(model_file, \'rb\') as model_handler:\n- fitted_estimator = pickle.load(model_handler)\n- new_selector = selector(fitted_estimator, prefit=True, **options)\n- else:\n- estimator_json = inputs[\'model_inputter\']["estimator_selector"]\n- estimator = get_estimator(estimator_json)\n- new_selector = selector(estimator, **options)\n-\n- elif inputs[\'selected_algorithm\'] in [\'RFE\', \'RFECV\']:\n- if \'scoring\' in options and (not options[\'scoring\'] or options[\'scoring\'] == \'None\'):\n- options[\'scoring\'] = None\n- estimator=get_estimator(inputs["estimator_selector"])\n- new_selector = selector(estimator, **options)\n-\n- elif inputs[\'selected_algorithm\'] == "VarianceThreshold":\n- new_selector = selector(**options)\n-\n- else:\n- score_func = inputs["score_func"]\n- score_func = getattr(sklearn.feature_selection, score_func)\n- new_selector = selector(score_func, **options)\n-\n- return new_selector\n- </token>\n-\n- <token name="@GET_X_y_FUNCTION@">\n-def get_X_y(params, file1, file2):\n- input_type = params["selected_tasks"]["selected_algorithms"]["input_options"]["selected_input"]\n- if input_type=="tabular":\n- header = \'infer\' if params["selected_tasks"]["selected_algorithms"]["input_options"]["header1"] else None\n- column_option = params["selected_tasks"]["selected_algorithms"]["input_options"]["column_selector_options_1"]["selected_column_selector_option"]\n- if column_option in ["by_index_number", "all_but_by_index_number", "by_header_name", "all_but_by_header_name"]:\n- c = params["selected_tasks"]["selected_algorithms"]["input_options"]["column_selector_options_1"]["col1"]\n- else:\n- c = None\n- X = read_columns(\n- file1,\n- c = c,\n- c_option = column_option,\n- sep=\'\\t\',\n- header=header,\n- parse_dates=True\n- )\n- else:\n- X = mmread(file1)\n-\n- header = \'infer\' if params["selected_tasks"]["selected_algorithms"]["input_options"]["header2"] else None\n- column_option = params["selected_tasks"]["selected_algorithms"]["input_options"]["column_selector_options_2"]["selected_column_selector_option2"]\n- if column_option in ["by_index_number", "all_but_by_index_number", "by_header_name", "all_but_by_header_name"]:\n- c = params["selected_tasks"]["selected_algorithms"]["input_options"]["column_selector_options_2"]["col2"]\n- else:\n- c = None\n- y = read_columns(\n- file2,\n- c = c,\n- c_option = column_option,\n- sep=\'\\t\',\n- header=header,\n- parse_dates=True\n- )\n- y=y.ravel()\n- return X, y\n- </token>\n-\n- <token name="@SAFE_EVAL_FUNCTION@">\n-de'..b'copy=True, iterated_power=\'auto\', n_components=None, random_state=None, svd_solver=\'auto\', tol=0.0, whiten=False."/>\n </when>\n <when value="SparsePCA">\n- <expand macro="estimator_params_text" label="Type in maxtrix decomposition parameters:"\n- help="Parameters in dictionary without braces (\'{}\'), e.g., \'n_components\': 100, \'random_state\': 42. No double quotes. Leave this box blank for class default."/>\n- </when>\n- <when value="SparseCoder">\n- <expand macro="estimator_params_text" label="Type in maxtrix decomposition parameters:"\n- help="Parameters in dictionary without braces (\'{}\'), e.g., \'transform_algorithm\': \'omp\', \'transform_alpha\': 1.0. No double quotes. Leave this box blank for class default."/>\n+ <expand macro="estimator_params_text"\n+ help="Default(=blank): U_init=None, V_init=None, alpha=1, max_iter=1000, method=\'lars\', n_components=None, random_state=None, ridge_alpha=0.01, tol=1e-08, verbose=False."/>\n </when>\n <when value="TruncatedSVD">\n- <expand macro="estimator_params_text" label="Type in maxtrix decomposition parameters:"\n- help="Parameters in dictionary without braces (\'{}\'), e.g., \'n_components\': 2, \'algorithm\': \'randomized\'. No double quotes. Leave this box blank for default estimator."/>\n+ <expand macro="estimator_params_text"\n+ help="Default(=blank): algorithm=\'randomized\', n_components=2, n_iter=5, random_state=None, tol=0.0."/>\n </when>\n </conditional>\n </xml>\n@@ -1470,8 +1359,45 @@\n <option value="FeatureAgglomeration" selected="true">FeatureAgglomeration</option>\n </param>\n <when value="FeatureAgglomeration">\n- <expand macro="estimator_params_text" label="Type in parameters:"\n- help="Parameters in dictionary without braces (\'{}\'), e.g., \'n_clusters\': 2, \'affinity\': \'euclidean\'. No double quotes. Leave this box blank for class default."/>\n+ <expand macro="estimator_params_text"\n+ help="Default(=blank): affinity=\'euclidean\', compute_full_tree=\'auto\', connectivity=None, linkage=\'ward\', memory=None, n_clusters=2, pooling_func=np.mean."/>\n+ </when>\n+ </conditional>\n+ </xml>\n+\n+ <xml name="skrebate">\n+ <conditional name="skrebate_selector">\n+ <param name="select_algorithm" type="select" label="Choose the algorithm:">\n+ <option value="ReliefF">ReliefF</option>\n+ <option value="SURF">SURF</option>\n+ <option value="SURFstar">SURFstar</option>\n+ <option value="MultiSURF">MultiSURF</option>\n+ <option value="MultiSURFstar">MultiSURFstar</option>\n+ <option value="TuRF">TuRF</option>\n+ </param>\n+ <when value="ReliefF">\n+ <expand macro="estimator_params_text"\n+ help="Default(=blank): discrete_threshold=10, n_features_to_select=10, n_neighbors=100, verbose=False."/>\n+ </when>\n+ <when value="SURF">\n+ <expand macro="estimator_params_text"\n+ help="Default(=blank): discrete_threshold=10, n_features_to_select=10, verbose=False."/>\n+ </when>\n+ <when value="SURFstar">\n+ <expand macro="estimator_params_text"\n+ help="Default(=blank): discrete_threshold=10, n_features_to_select=10, verbose=False."/>\n+ </when>\n+ <when value="MultiSURF">\n+ <expand macro="estimator_params_text"\n+ help="Default(=blank): discrete_threshold=10, n_features_to_select=10, verbose=False."/>\n+ </when>\n+ <when value="MultiSURFstar">\n+ <expand macro="estimator_params_text"\n+ help="Default(=blank): discrete_threshold=10, n_features_to_select=10, verbose=False."/>\n+ </when>\n+ <when value="TuRF">\n+ <expand macro="estimator_params_text"\n+ help="Default(=blank): core_algorithm=\'ReliefF\', discrete_threshold=10, n_features_to_select=10, n_neighbors=100, pct=0.5, verbose=False."/>\n </when>\n </conditional>\n </xml>\n' |
b |
diff -r 9ddacd0b8c8b -r 0857964e07c2 pipeline.xml --- a/pipeline.xml Tue Aug 07 05:45:45 2018 -0400 +++ b/pipeline.xml Fri Aug 17 12:26:40 2018 -0400 |
[ |
b'@@ -3,7 +3,9 @@\n <macros>\n <import>main_macros.xml</import>\n </macros>\n- <expand macro="python_requirements"/>\n+ <expand macro="python_requirements">\n+ <requirement type="package" version="0.6">skrebate</requirement>\n+ </expand>\n <expand macro="macro_stdio"/>\n <version_command>echo "@VERSION@"</version_command>\n <command>\n@@ -16,18 +18,18 @@\n <configfile name="sklearn_pipeline_script">\n <![CDATA[\n import sys\n+import os\n import json\n import pickle\n import pprint\n-import xgboost\n-import ast\n-import sklearn.feature_selection\n-from sklearn import ( preprocessing, svm, linear_model, ensemble, naive_bayes,\n+import skrebate\n+from sklearn import (preprocessing, svm, linear_model, ensemble, naive_bayes,\n tree, neighbors, decomposition, kernel_approximation, cluster)\n from sklearn.pipeline import Pipeline\n \n-@GET_ESTIMATOR_FUNCTION@\n-@FEATURE_SELECTOR_FUNCTION@\n+execfile("$__tool_directory__/utils.py")\n+\n+safe_eval = SafeEval()\n \n input_json_path = sys.argv[1]\n with open(input_json_path, "r") as param_handler:\n@@ -45,34 +47,44 @@\n preprocessor = input_json["pre_processors"]["selected_pre_processor"]\n pre_processor_options = input_json["pre_processors"]["options"]\n my_class = getattr(preprocessing, preprocessor)\n- return my_class(**pre_processor_options)\n- if input_json[\'component_type\'] == \'feature_selection\':\n- fs_obj = feature_selector(input_json[\'fs_algorithm_selector\'])\n- return fs_obj\n- if input_json[\'component_type\'] == \'decomposition\':\n+ obj = my_class(**pre_processor_options)\n+ elif input_json[\'component_type\'] == \'feature_selection\':\n+ obj = feature_selector(input_json[\'fs_algorithm_selector\'])\n+ elif input_json[\'component_type\'] == \'decomposition\':\n algorithm = input_json[\'matrix_decomposition_selector\'][\'select_algorithm\']\n obj = getattr(decomposition, algorithm)()\n options = input_json[\'matrix_decomposition_selector\'][\'text_params\'].strip()\n if options != "":\n- options = ast.literal_eval(\'{\' + options + \'}\')\n+ options = safe_eval(\'dict(\' + options + \')\')\n obj.set_params(**options)\n- return obj\n- if input_json[\'component_type\'] == \'kernel_approximation\':\n+ elif input_json[\'component_type\'] == \'kernel_approximation\':\n algorithm = input_json[\'kernel_approximation_selector\'][\'select_algorithm\']\n obj = getattr(kernel_approximation, algorithm)()\n options = input_json[\'kernel_approximation_selector\'][\'text_params\'].strip()\n if options != "":\n- options = ast.literal_eval(\'{\' + options + \'}\')\n+ options = safe_eval(\'dict(\' + options + \')\')\n obj.set_params(**options)\n- return obj\n- if input_json[\'component_type\'] == \'FeatureAgglomeration\':\n+ elif input_json[\'component_type\'] == \'FeatureAgglomeration\':\n algorithm = input_json[\'FeatureAgglomeration_selector\'][\'select_algorithm\']\n obj = getattr(cluster, algorithm)()\n options = input_json[\'FeatureAgglomeration_selector\'][\'text_params\'].strip()\n if options != "":\n- options = ast.literal_eval(\'{\' + options + \'}\')\n+ options = safe_eval(\'dict(\' + options + \')\')\n obj.set_params(**options)\n- return obj\n+ elif input_json[\'component_type\'] == \'skrebate\':\n+ algorithm = input_json[\'skrebate_selector\'][\'select_algorithm\']\n+ if algorithm == \'TuRF\':\n+ obj = getattr(skrebate, algorithm)(core_algorithm=\'ReliefF\')\n+ else:\n+ obj = getattr(skrebate, algorithm)()\n+ options = input_json[\'skrebate_selector\'][\'text_params\'].strip()\n+ if options != "":\n+ options = safe_eval(\'dict(\' + options + \')\')\n+ obj.set_params(**options)\n+ if \'n_jobs\' in obj.get_params():\n+ obj.set_params( n_jobs=N_JOBS )\n+ return obj\n+\n if len(params[\'pipeline_compon'..b'ne05" compare="sim_size" delta="1"/>\n </test>\n <test>\n@@ -228,7 +244,7 @@\n <param name="component_type" value="kernel_approximation"/>\n <conditional name="kernel_approximation_selector">\n <param name="select_algorithm" value="RBFSampler"/>\n- <param name="text_params" value="\'n_components\': 10, \'gamma\': 2.0"/>\n+ <param name="text_params" value="n_components=10, gamma=2.0"/>\n </conditional>\n </conditional>\n <param name="selected_module" value="ensemble"/>\n@@ -240,13 +256,37 @@\n <param name="component_type" value="FeatureAgglomeration"/>\n <conditional name="FeatureAgglomeration_selector">\n <param name="select_algorithm" value="FeatureAgglomeration"/>\n- <param name="text_params" value="\'n_clusters\': 3, \'affinity\': \'euclidean\'"/>\n+ <param name="text_params" value="n_clusters=3, affinity=\'euclidean\'"/>\n </conditional>\n </conditional>\n <param name="selected_module" value="ensemble"/>\n <param name="selected_estimator" value="AdaBoostClassifier"/>\n <output name="outfile" file="pipeline08" compare="sim_size" delta="1"/>\n </test>\n+ <test>\n+ <conditional name="component_selector">\n+ <param name="component_type" value="skrebate"/>\n+ <conditional name="skrebate_selector">\n+ <param name="select_algorithm" value="ReliefF"/>\n+ <param name="text_params" value="n_features_to_select=3, n_neighbors=100"/>\n+ </conditional>\n+ </conditional>\n+ <param name="selected_module" value="ensemble"/>\n+ <param name="selected_estimator" value="RandomForestRegressor"/>\n+ <output name="outfile" file="pipeline09" compare="sim_size" delta="1"/>\n+ </test>\n+ <test>\n+ <conditional name="component_selector">\n+ <param name="component_type" value="skrebate"/>\n+ <conditional name="skrebate_selector">\n+ <param name="select_algorithm" value="TuRF"/>\n+ <param name="text_params" value=""/>\n+ </conditional>\n+ </conditional>\n+ <param name="selected_module" value="ensemble"/>\n+ <param name="selected_estimator" value="RandomForestRegressor"/>\n+ <output name="outfile" file="pipeline10" compare="sim_size" delta="1"/>\n+ </test>\n </tests>\n <help>\n <![CDATA[\n@@ -255,7 +295,7 @@\n that can be cross-validated together while setting different parameters.\n please refer to `Scikit-learn pipeline Pipeline`_.\n \n-**Pre-processing components** allow None, one or a combination of up to 5 transformations from `sklearn.preprocessing`_, `feature_selection`_, `decomposition`_, `kernel_approximation`_ and/or `cluster.FeatureAgglomeration`_.\n+**Pre-processing components** allow None, one or a combination of up to 5 transformations from `sklearn.preprocessing`_, `feature_selection`_, `decomposition`_, `kernel_approximation`_, `cluster.FeatureAgglomeration`_ and/or `skrebate`_.\n \n **Estimator** selector supports estimators from `xgboost`_ and many scikit-learn modules, including `svm`_, `linear_model`_, `ensemble`_, `naive_bayes`_, `tree`_ and `neighbors`_.\n \n@@ -274,6 +314,7 @@\n .. _`decomposition`: http://scikit-learn.org/stable/modules/classes.html#module-sklearn.decomposition\n .. _`kernel_approximation`: http://scikit-learn.org/stable/modules/classes.html#module-sklearn.kernel_approximation\n .. _`cluster.FeatureAgglomeration`: http://scikit-learn.org/stable/modules/generated/sklearn.cluster.FeatureAgglomeration.html\n+.. _`skrebate`: https://epistasislab.github.io/scikit-rebate/using/\n \n ]]>\n </help>\n' |
b |
diff -r 9ddacd0b8c8b -r 0857964e07c2 test-data/pipeline09 |
b |
Binary file test-data/pipeline09 has changed |
b |
diff -r 9ddacd0b8c8b -r 0857964e07c2 test-data/pipeline10 |
b |
Binary file test-data/pipeline10 has changed |
b |
diff -r 9ddacd0b8c8b -r 0857964e07c2 utils.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/utils.py Fri Aug 17 12:26:40 2018 -0400 |
[ |
b'@@ -0,0 +1,251 @@\n+import sys\n+import os\n+import pandas\n+import re\n+import pickle\n+import warnings\n+import numpy as np\n+import xgboost\n+import scipy\n+import sklearn\n+import ast\n+from asteval import Interpreter, make_symbol_table\n+from sklearn import metrics, model_selection, ensemble, svm, linear_model, naive_bayes, tree, neighbors\n+\n+N_JOBS = int( os.environ.get(\'GALAXY_SLOTS\', 1) )\n+\n+def read_columns(f, c=None, c_option=\'by_index_number\', return_df=False, **args):\n+ data = pandas.read_csv(f, **args)\n+ if c_option == \'by_index_number\':\n+ cols = list(map(lambda x: x - 1, c))\n+ data = data.iloc[:,cols]\n+ if c_option == \'all_but_by_index_number\':\n+ cols = list(map(lambda x: x - 1, c))\n+ data.drop(data.columns[cols], axis=1, inplace=True)\n+ if c_option == \'by_header_name\':\n+ cols = [e.strip() for e in c.split(\',\')]\n+ data = data[cols]\n+ if c_option == \'all_but_by_header_name\':\n+ cols = [e.strip() for e in c.split(\',\')]\n+ data.drop(cols, axis=1, inplace=True)\n+ y = data.values\n+ if return_df:\n+ return y, data\n+ else:\n+ return y\n+ return y\n+\n+\n+## generate an instance for one of sklearn.feature_selection classes\n+def feature_selector(inputs):\n+ selector = inputs["selected_algorithm"]\n+ selector = getattr(sklearn.feature_selection, selector)\n+ options = inputs["options"]\n+\n+ if inputs[\'selected_algorithm\'] == \'SelectFromModel\':\n+ if not options[\'threshold\'] or options[\'threshold\'] == \'None\':\n+ options[\'threshold\'] = None\n+ if inputs[\'model_inputter\'][\'input_mode\'] == \'prefitted\':\n+ model_file = inputs[\'model_inputter\'][\'fitted_estimator\']\n+ with open(model_file, \'rb\') as model_handler:\n+ fitted_estimator = pickle.load(model_handler)\n+ new_selector = selector(fitted_estimator, prefit=True, **options)\n+ else:\n+ estimator_json = inputs[\'model_inputter\']["estimator_selector"]\n+ estimator = get_estimator(estimator_json)\n+ new_selector = selector(estimator, **options)\n+\n+ elif inputs[\'selected_algorithm\'] == \'RFE\':\n+ estimator=get_estimator(inputs["estimator_selector"])\n+ new_selector = selector(estimator, **options)\n+\n+ elif inputs[\'selected_algorithm\'] == \'RFECV\':\n+ options[\'scoring\'] = get_scoring(options[\'scoring\'])\n+ options[\'n_jobs\'] = N_JOBS\n+ options[\'cv\'] = get_cv( options[\'cv\'].strip() )\n+ estimator=get_estimator(inputs["estimator_selector"])\n+ new_selector = selector(estimator, **options)\n+\n+ elif inputs[\'selected_algorithm\'] == "VarianceThreshold":\n+ new_selector = selector(**options)\n+\n+ else:\n+ score_func = inputs["score_func"]\n+ score_func = getattr(sklearn.feature_selection, score_func)\n+ new_selector = selector(score_func, **options)\n+\n+ return new_selector\n+ \n+\n+def get_X_y(params, file1, file2):\n+ input_type = params["selected_tasks"]["selected_algorithms"]["input_options"]["selected_input"]\n+ if input_type=="tabular":\n+ header = \'infer\' if params["selected_tasks"]["selected_algorithms"]["input_options"]["header1"] else None\n+ column_option = params["selected_tasks"]["selected_algorithms"]["input_options"]["column_selector_options_1"]["selected_column_selector_option"]\n+ if column_option in ["by_index_number", "all_but_by_index_number", "by_header_name", "all_but_by_header_name"]:\n+ c = params["selected_tasks"]["selected_algorithms"]["input_options"]["column_selector_options_1"]["col1"]\n+ else:\n+ c = None\n+ X = read_columns(\n+ file1,\n+ c = c,\n+ c_option = column_option,\n+ sep=\'\\t\',\n+ header=header,\n+ parse_dates=True\n+ )\n+ else:\n+ X = mmread(file1)\n+\n+ header = \'infer\' if params["selected_tasks"]["selected_algorithms"]["input_options"]["header2"] else None\n+ co'..b' \'vonmises\', \'wald\', \'weibull\', \'zipf\' ]\n+ for f in from_numpy_random:\n+ syms[\'np_random_\' + f] = getattr(np.random, f)\n+\n+ for key in unwanted:\n+ syms.pop(key, None)\n+\n+ super(SafeEval, self).__init__( symtable=syms, use_numpy=False, minimal=False,\n+ no_if=True, no_for=True, no_while=True, no_try=True,\n+ no_functiondef=True, no_ifexp=True, no_listcomp=False,\n+ no_augassign=False, no_assert=True, no_delete=True,\n+ no_raise=True, no_print=True)\n+\n+\n+def get_search_params(params_builder):\n+ search_params = {}\n+ safe_eval = SafeEval(load_scipy=True, load_numpy=True)\n+\n+ for p in params_builder[\'param_set\']:\n+ search_p = p[\'search_param_selector\'][\'search_p\']\n+ if search_p.strip() == \'\':\n+ continue\n+ param_type = p[\'search_param_selector\'][\'selected_param_type\']\n+\n+ lst = search_p.split(":")\n+ assert (len(lst) == 2), "Error, make sure there is one and only one colon in search parameter input."\n+ literal = lst[1].strip()\n+ ev = safe_eval(literal)\n+ if param_type == "final_estimator_p":\n+ search_params["estimator__" + lst[0].strip()] = ev\n+ else:\n+ search_params["preprocessing_" + param_type[5:6] + "__" + lst[0].strip()] = ev\n+\n+ return search_params\n+\n+\n+def get_estimator(estimator_json):\n+ estimator_module = estimator_json[\'selected_module\']\n+ estimator_cls = estimator_json[\'selected_estimator\']\n+\n+ if estimator_module == "xgboost":\n+ cls = getattr(xgboost, estimator_cls)\n+ else:\n+ module = getattr(sklearn, estimator_module)\n+ cls = getattr(module, estimator_cls)\n+\n+ estimator = cls()\n+\n+ estimator_params = estimator_json[\'text_params\'].strip()\n+ if estimator_params != "":\n+ try:\n+ params = safe_eval(\'dict(\' + estimator_params + \')\')\n+ except ValueError:\n+ sys.exit("Unsupported parameter input: `%s`" %estimator_params)\n+ estimator.set_params(**params)\n+ if \'n_jobs\' in estimator.get_params():\n+ estimator.set_params( n_jobs=N_JOBS )\n+\n+ return estimator\n+\n+\n+def get_cv(literal):\n+ safe_eval = SafeEval()\n+ if literal == "":\n+ return None\n+ if literal.isdigit():\n+ return int(literal)\n+ m = re.match(r\'^(?P<method>\\w+)\\((?P<args>.*)\\)$\', literal)\n+ if m:\n+ my_class = getattr( model_selection, m.group(\'method\') )\n+ args = safe_eval( \'dict(\'+ m.group(\'args\') + \')\' )\n+ return my_class( **args )\n+ sys.exit("Unsupported CV input: %s" %literal)\n+\n+\n+def get_scoring(scoring_json):\n+ def balanced_accuracy_score(y_true, y_pred):\n+ C = metrics.confusion_matrix(y_true, y_pred)\n+ with np.errstate(divide=\'ignore\', invalid=\'ignore\'):\n+ per_class = np.diag(C) / C.sum(axis=1)\n+ if np.any(np.isnan(per_class)):\n+ warnings.warn(\'y_pred contains classes not in y_true\')\n+ per_class = per_class[~np.isnan(per_class)]\n+ score = np.mean(per_class)\n+ return score\n+\n+ if scoring_json[\'primary_scoring\'] == "default":\n+ return None\n+\n+ my_scorers = metrics.SCORERS\n+ if \'balanced_accuracy\' not in my_scorers:\n+ my_scorers[\'balanced_accuracy\'] = metrics.make_scorer(balanced_accuracy_score)\n+\n+ if scoring_json[\'secondary_scoring\'] != \'None\'\\\n+ and scoring_json[\'secondary_scoring\'] != scoring_json[\'primary_scoring\']:\n+ scoring = {}\n+ scoring[\'primary\'] = my_scorers[ scoring_json[\'primary_scoring\'] ]\n+ for scorer in scoring_json[\'secondary_scoring\'].split(\',\'):\n+ if scorer != scoring_json[\'primary_scoring\']:\n+ scoring[scorer] = my_scorers[scorer]\n+ return scoring\n+\n+ return my_scorers[ scoring_json[\'primary_scoring\'] ]\n+\n' |