Previous changeset 1:02eadaaa4bf7 (2018-05-22) Next changeset 3:424d8d21744d (2018-06-05) |
Commit message:
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 4ed8c4f6ef9ece81797a398b17a99bbaf49a6978 |
modified:
main_macros.xml model_validation.xml |
added:
test-data/mv_result07.tabular |
b |
diff -r 02eadaaa4bf7 -r dd502cb0d567 main_macros.xml --- a/main_macros.xml Tue May 22 19:33:14 2018 -0400 +++ b/main_macros.xml Wed May 30 08:27:01 2018 -0400 |
[ |
@@ -16,6 +16,47 @@ return y </token> +## generate an instance for one of sklearn.feature_selection classes +## must call "@COLUMNS_FUNCTION@" + <token name="@FEATURE_SELECTOR_FUNCTION@"> +def feature_selector(inputs): + selector = inputs["selected_algorithm"] + selector = getattr(sklearn.feature_selection, selector) + options = inputs["options"] + + if inputs['selected_algorithm'] == 'SelectFromModel': + if not options['threshold'] or options['threshold'] == 'None': + options['threshold'] = None + if 'extra_estimator' in inputs and inputs['extra_estimator']['has_estimator'] == 'no_load': + fitted_estimator = pickle.load(open("inputs['extra_estimator']['fitted_estimator']", 'r')) + new_selector = selector(fitted_estimator, prefit=True, **options) + else: + estimator=inputs["estimator"] + if inputs["extra_estimator"]["has_estimator"]=='no': + estimator=inputs["extra_estimator"]["new_estimator"] + estimator=eval(estimator.replace('__dq__', '"').replace("__sq__","'")) + new_selector = selector(estimator, **options) + + elif inputs['selected_algorithm'] in ['RFE', 'RFECV']: + if 'scoring' in options and (not options['scoring'] or options['scoring'] == 'None'): + options['scoring'] = None + estimator=inputs["estimator"] + if inputs["extra_estimator"]["has_estimator"]=='no': + estimator=inputs["extra_estimator"]["new_estimator"] + estimator=eval(estimator.replace('__dq__', '"').replace("__sq__","'")) + new_selector = selector(estimator, **options) + + elif inputs['selected_algorithm'] == "VarianceThreshold": + new_selector = selector(**options) + + else: + score_func = inputs["score_func"] + score_func = getattr(sklearn.feature_selection, score_func) + new_selector = selector(score_func, **options) + + return new_selector + </token> + <xml name="python_requirements"> <requirements> <requirement type="package" version="2.7">python</requirement> @@ -794,6 +835,13 @@ </when> <yield/> </xml> + <xml name="estimator_input_no_fit"> + <expand macro="feature_selection_estimator" /> + <conditional name="extra_estimator"> + <expand macro="feature_selection_extra_estimator" /> + <expand macro="feature_selection_estimator_choices" /> + </conditional> + </xml> <xml name="feature_selection_all"> <conditional name="feature_selection_algorithms"> <param name="selected_algorithm" type="select" label="Select a feature selection algorithm"> @@ -975,8 +1023,8 @@ <param argument="scoring" type="text" value="" optional="true" label="scoring" help="A metric used to evaluate the estimator"/> </xml> - <xml name="pre_dispatch"> - <param argument="pre_dispatch" type="text" value="all" optional="true" label="pre_dispatch" help="Number of predispatched jobs for parallel execution"/> + <xml name="pre_dispatch" token_type="text" token_default_value="all" token_help="Number of predispatched jobs for parallel execution"> + <param argument="pre_dispatch" type="@TYPE@" value="@DEFAULT_VALUE@" optional="true" label="pre_dispatch" help="@HELP@"/> </xml> <!-- Outputs --> |
b |
diff -r 02eadaaa4bf7 -r dd502cb0d567 model_validation.xml --- a/model_validation.xml Tue May 22 19:33:14 2018 -0400 +++ b/model_validation.xml Wed May 30 08:27:01 2018 -0400 |
[ |
b'@@ -18,13 +18,17 @@\n import sys\n import json\n import pandas\n+import ast\n import pickle\n import numpy as np\n import sklearn.model_selection\n from sklearn import svm, linear_model, ensemble\n+from sklearn.pipeline import Pipeline\n \n @COLUMNS_FUNCTION@\n \n+@FEATURE_SELECTOR_FUNCTION@\n+\n input_json_path = sys.argv[1]\n params = json.load(open(input_json_path, "r"))\n \n@@ -51,50 +55,90 @@\n )\n y=y.ravel()\n \n-validator = params["model_validation_functions"]["selected_function"]\n-validator = getattr(sklearn.model_selection, validator)\n options = params["model_validation_functions"]["options"]\n if \'scoring\' in options and options[\'scoring\'] == \'\':\n options[\'scoring\'] = None\n+if \'pre_dispatch\' in options and options[\'pre_dispatch\'] == \'\':\n+ options[\'pre_dispatch\'] = None\n \n+pipeline_steps = []\n+\n+## Set up feature selector and add to pipeline steps.\n+if params[\'feature_selection\'][\'do_feature_selection\'] == \'Yes\':\n+ feature_selector = feature_selector(params[\'feature_selection\'][\'feature_selection_algorithms\'])\n+ pipeline_steps.append( (\'feature_selector\', feature_selector))\n+\n+## Set up estimator and add to pipeline.\n estimator=params["model_validation_functions"]["estimator"]\n if params["model_validation_functions"]["extra_estimator"]["has_estimator"] == \'no\':\n estimator = params["model_validation_functions"]["extra_estimator"]["new_estimator"]\n estimator = eval(estimator.replace(\'__dq__\', \'"\').replace("__sq__","\'"))\n \n-#if $model_validation_functions.selected_function == \'cross_validate\':\n-res = validator(estimator, X, y, **options)\n-rval = res["$model_validation_functions.return_type"]\n+pipeline_steps.append( (\'estimator\', estimator) )\n+\n+pipeline = Pipeline(pipeline_steps)\n+\n+## Set up validator, run pipeline through validator and return results.\n \n-#elif $model_validation_functions.selected_function == \'learning_curve\':\n-options[\'train_sizes\'] = eval(options[\'train_sizes\'])\n-train_sizes_abs, train_scores, test_scores = validator(estimator, X, y, **options)\n-rval = eval("$model_validation_functions.return_type")\n+validator = params["model_validation_functions"]["selected_function"]\n+validator = getattr(sklearn.model_selection, validator)\n+\n+selected_function = params["model_validation_functions"]["selected_function"]\n+rval_type = params["model_validation_functions"].get("return_type", None)\n \n-#elif $model_validation_functions.selected_function == \'permutation_test_score\':\n-score, permutation_scores, pvalue = validator(estimator, X, y, **options)\n-rval = eval("$model_validation_functions.return_type")\n-if "$model_validation_functions.return_type" in ["score", "pvalue"]:\n- rval = [rval]\n-\n-#elif $model_validation_functions.selected_function == \'validation_curve\':\n-options[\'param_range\'] = eval(options[\'param_range\'])\n-train_scores, test_scores = validator(estimator, X, y, **options)\n-rval = eval("$model_validation_functions.return_type")\n-\n-#else:\n-rval = validator(estimator, X, y, **options)\n-#end if\n+if selected_function == \'cross_validate\':\n+ res = validator(pipeline, X, y, **options)\n+ rval = res[rval_type]\n+elif selected_function == \'learning_curve\':\n+ options[\'train_sizes\'] = eval(options[\'train_sizes\'])\n+ train_sizes_abs, train_scores, test_scores = validator(pipeline, X, y, **options)\n+ rval = eval(rval_type)\n+elif selected_function == \'permutation_test_score\':\n+ score, permutation_scores, pvalue = validator(pipeline, X, y, **options)\n+ rval = eval(rval_type)\n+ if rval_type in ["score", "pvalue"]:\n+ rval = [rval]\n+elif selected_function == \'validation_curve\':\n+ options[\'param_name\'] = \'estimator__\' + options[\'param_name\']\n+ options[\'param_range\'] = eval(options[\'param_range\'])\n+ train_scores, test_scores = validator(pipeline, X, y, **options)\n+ rval = eval(rval_type)\n+elif selected_function == \'GridSearchCV\':\n+ param_grid = params["model_validation_functions"]["param_grid"].replace("__sq__","\'")\\\n+ .replace(\'__dq__\',\'"\').replace("__oc__", "{").replace("__c'..b're">\n- <expand macro="feature_selection_estimator" />\n- <conditional name="extra_estimator">\n- <expand macro="feature_selection_extra_estimator" />\n- <expand macro="feature_selection_estimator_choices" />\n- </conditional>\n+ <expand macro="estimator_input_no_fit" />\n <section name="options" title="Other Options" expanded="false">\n <!--groups-->\n <expand macro="model_validation_common_options"/>\n@@ -156,11 +206,7 @@\n </section>\n </when>\n <when value="learning_curve">\n- <expand macro="feature_selection_estimator" />\n- <conditional name="extra_estimator">\n- <expand macro="feature_selection_extra_estimator" />\n- <expand macro="feature_selection_estimator_choices" />\n- </conditional>\n+ <expand macro="estimator_input_no_fit" />\n <section name="options" title="Other Options" expanded="false">\n <!--groups-->\n <expand macro="model_validation_common_options"/>\n@@ -178,11 +224,7 @@\n </param>\n </when>\n <when value="permutation_test_score">\n- <expand macro="feature_selection_estimator" />\n- <conditional name="extra_estimator">\n- <expand macro="feature_selection_extra_estimator" />\n- <expand macro="feature_selection_estimator_choices" />\n- </conditional>\n+ <expand macro="estimator_input_no_fit" />\n <section name="options" title="Other Options" expanded="false">\n <!--groups-->\n <expand macro="model_validation_common_options"/>\n@@ -197,11 +239,7 @@\n </param>\n </when>\n <when value="validation_curve">\n- <expand macro="feature_selection_estimator" />\n- <conditional name="extra_estimator">\n- <expand macro="feature_selection_extra_estimator" />\n- <expand macro="feature_selection_estimator_choices" />\n- </conditional>\n+ <expand macro="estimator_input_no_fit" />\n <section name="options" title="Other Options" expanded="false">\n <param name="param_name" type="text" value="gamma" label="param_name" help="Name of the parameter that will be varied"/>\n <param name="param_range" type="text" value="np.logspace(-6, -1, 5)" label="param_range" help="The values of the parameter that will be evaluated."/>\n@@ -287,6 +325,23 @@\n <param name="return_type" value="test_scores"/>\n <output name="outfile" file="mv_result06.tabular"/>\n </test>\n+ <test>\n+ <param name="do_feature_selection" value="Yes"/>\n+ <param name="selected_algorithm" value="SelectKBest"/>\n+ <param name="score_func" value="chi2"/>\n+ <param name="selected_function" value="GridSearchCV"/>\n+ <param name="estimator" value="svm.SVR(kernel="linear")"/>\n+ <param name="has_estimator" value="yes"/>\n+ <param name="param_grid" value="[{\'feature_selector__k\': [3, 7], \'estimator__C\': [1, 100]}]"/>\n+ <param name="return_type" value="best_score_"/>\n+ <param name="infile1" value="regression_X.tabular" ftype="tabular"/>\n+ <param name="header1" value="true" />\n+ <param name="col1" value="1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17"/>\n+ <param name="infile2" value="regression_y.tabular" ftype="tabular"/>\n+ <param name="header2" value="true" />\n+ <param name="col2" value="1"/>\n+ <output name="outfile" file="mv_result07.tabular"/>\n+ </test>\n </tests>\n <help>\n <![CDATA[\n' |
b |
diff -r 02eadaaa4bf7 -r dd502cb0d567 test-data/mv_result07.tabular --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/mv_result07.tabular Wed May 30 08:27:01 2018 -0400 |
b |
@@ -0,0 +1,1 @@ +0.7824428015300172 |