Galaxy |

Changeset 2:dd502cb0d567 (2018-05-30)

Previous changeset 1:02eadaaa4bf7 (2018-05-22) Next changeset 3:424d8d21744d (2018-06-05)

Commit message:
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 4ed8c4f6ef9ece81797a398b17a99bbaf49a6978

modified:
main_macros.xml
model_validation.xml

added:
test-data/mv_result07.tabular

diff -r 02eadaaa4bf7 -r dd502cb0d567 main_macros.xml
--- a/main_macros.xml Tue May 22 19:33:14 2018 -0400
+++ b/main_macros.xml Wed May 30 08:27:01 2018 -0400

[

@@ -16,6 +16,47 @@
   return y
   </token>

+## generate an instance for one of sklearn.feature_selection classes
+## must call "@COLUMNS_FUNCTION@"
+  <token name="@FEATURE_SELECTOR_FUNCTION@">
+def feature_selector(inputs):
+  selector = inputs["selected_algorithm"]
+  selector = getattr(sklearn.feature_selection, selector)
+  options = inputs["options"]
+
+  if inputs['selected_algorithm'] == 'SelectFromModel':
+    if not options['threshold'] or options['threshold'] == 'None':
+      options['threshold'] = None
+      if 'extra_estimator' in inputs and inputs['extra_estimator']['has_estimator'] == 'no_load':
+        fitted_estimator = pickle.load(open("inputs['extra_estimator']['fitted_estimator']", 'r'))
+        new_selector = selector(fitted_estimator, prefit=True, **options)
+      else:
+        estimator=inputs["estimator"]
+        if inputs["extra_estimator"]["has_estimator"]=='no':
+          estimator=inputs["extra_estimator"]["new_estimator"]
+        estimator=eval(estimator.replace('__dq__', '"').replace("__sq__","'"))
+        new_selector = selector(estimator, **options)
+
+  elif inputs['selected_algorithm'] in ['RFE', 'RFECV']:
+    if 'scoring' in options and (not options['scoring'] or options['scoring'] == 'None'):
+      options['scoring'] = None
+    estimator=inputs["estimator"]
+    if inputs["extra_estimator"]["has_estimator"]=='no':
+      estimator=inputs["extra_estimator"]["new_estimator"]
+    estimator=eval(estimator.replace('__dq__', '"').replace("__sq__","'"))
+    new_selector = selector(estimator, **options)
+
+  elif inputs['selected_algorithm'] == "VarianceThreshold":
+    new_selector = selector(**options)
+
+  else:
+    score_func = inputs["score_func"]
+    score_func = getattr(sklearn.feature_selection, score_func)
+    new_selector = selector(score_func, **options)
+
+  return new_selector
+  </token>
+
   <xml name="python_requirements">
       <requirements>
           <requirement type="package" version="2.7">python</requirement>
@@ -794,6 +835,13 @@
     </when>
     <yield/>
   </xml>
+  <xml name="estimator_input_no_fit">
+    <expand macro="feature_selection_estimator" />
+    <conditional name="extra_estimator">
+      <expand macro="feature_selection_extra_estimator" />
+      <expand macro="feature_selection_estimator_choices" />
+    </conditional>
+  </xml>
   <xml name="feature_selection_all">
     <conditional name="feature_selection_algorithms">
       <param name="selected_algorithm" type="select" label="Select a feature selection algorithm">
@@ -975,8 +1023,8 @@
     <param argument="scoring" type="text" value="" optional="true" label="scoring" help="A metric used to evaluate the estimator"/>
   </xml>

-  <xml name="pre_dispatch">
-    <param argument="pre_dispatch" type="text" value="all" optional="true" label="pre_dispatch" help="Number of predispatched jobs for parallel execution"/>
+  <xml name="pre_dispatch" token_type="text" token_default_value="all" token_help="Number of predispatched jobs for parallel execution">
+    <param argument="pre_dispatch" type="@TYPE@" value="@DEFAULT_VALUE@" optional="true" label="pre_dispatch" help="@HELP@"/>
   </xml>

diff -r 02eadaaa4bf7 -r dd502cb0d567 model_validation.xml
--- a/model_validation.xml Tue May 22 19:33:14 2018 -0400
+++ b/model_validation.xml Wed May 30 08:27:01 2018 -0400

[

b'@@ -18,13 +18,17 @@\n import sys\n import json\n import pandas\n+import ast\n import pickle\n import numpy as np\n import sklearn.model_selection\n from sklearn import svm, linear_model, ensemble\n+from sklearn.pipeline import Pipeline\n \n @COLUMNS_FUNCTION@\n \n+@FEATURE_SELECTOR_FUNCTION@\n+\n input_json_path = sys.argv[1]\n params = json.load(open(input_json_path, "r"))\n \n@@ -51,50 +55,90 @@\n )\n y=y.ravel()\n \n-validator = params["model_validation_functions"]["selected_function"]\n-validator = getattr(sklearn.model_selection, validator)\n options = params["model_validation_functions"]["options"]\n if \'scoring\' in options and options[\'scoring\'] == \'\':\n options[\'scoring\'] = None\n+if \'pre_dispatch\' in options and options[\'pre_dispatch\'] == \'\':\n+ options[\'pre_dispatch\'] = None\n \n+pipeline_steps = []\n+\n+## Set up feature selector and add to pipeline steps.\n+if params[\'feature_selection\'][\'do_feature_selection\'] == \'Yes\':\n+ feature_selector = feature_selector(params[\'feature_selection\'][\'feature_selection_algorithms\'])\n+ pipeline_steps.append( (\'feature_selector\', feature_selector))\n+\n+## Set up estimator and add to pipeline.\n estimator=params["model_validation_functions"]["estimator"]\n if params["model_validation_functions"]["extra_estimator"]["has_estimator"] == \'no\':\n estimator = params["model_validation_functions"]["extra_estimator"]["new_estimator"]\n estimator = eval(estimator.replace(\'__dq__\', \'"\').replace("__sq__","\'"))\n \n-#if $model_validation_functions.selected_function == \'cross_validate\':\n-res = validator(estimator, X, y, **options)\n-rval = res["$model_validation_functions.return_type"]\n+pipeline_steps.append( (\'estimator\', estimator) )\n+\n+pipeline = Pipeline(pipeline_steps)\n+\n+## Set up validator, run pipeline through validator and return results.\n \n-#elif $model_validation_functions.selected_function == \'learning_curve\':\n-options[\'train_sizes\'] = eval(options[\'train_sizes\'])\n-train_sizes_abs, train_scores, test_scores = validator(estimator, X, y, **options)\n-rval = eval("$model_validation_functions.return_type")\n+validator = params["model_validation_functions"]["selected_function"]\n+validator = getattr(sklearn.model_selection, validator)\n+\n+selected_function = params["model_validation_functions"]["selected_function"]\n+rval_type = params["model_validation_functions"].get("return_type", None)\n \n-#elif $model_validation_functions.selected_function == \'permutation_test_score\':\n-score, permutation_scores, pvalue = validator(estimator, X, y, **options)\n-rval = eval("$model_validation_functions.return_type")\n-if "$model_validation_functions.return_type" in ["score", "pvalue"]:\n- rval = [rval]\n-\n-#elif $model_validation_functions.selected_function == \'validation_curve\':\n-options[\'param_range\'] = eval(options[\'param_range\'])\n-train_scores, test_scores = validator(estimator, X, y, **options)\n-rval = eval("$model_validation_functions.return_type")\n-\n-#else:\n-rval = validator(estimator, X, y, **options)\n-#end if\n+if selected_function == \'cross_validate\':\n+ res = validator(pipeline, X, y, **options)\n+ rval = res[rval_type]\n+elif selected_function == \'learning_curve\':\n+ options[\'train_sizes\'] = eval(options[\'train_sizes\'])\n+ train_sizes_abs, train_scores, test_scores = validator(pipeline, X, y, **options)\n+ rval = eval(rval_type)\n+elif selected_function == \'permutation_test_score\':\n+ score, permutation_scores, pvalue = validator(pipeline, X, y, **options)\n+ rval = eval(rval_type)\n+ if rval_type in ["score", "pvalue"]:\n+ rval = [rval]\n+elif selected_function == \'validation_curve\':\n+ options[\'param_name\'] = \'estimator__\' + options[\'param_name\']\n+ options[\'param_range\'] = eval(options[\'param_range\'])\n+ train_scores, test_scores = validator(pipeline, X, y, **options)\n+ rval = eval(rval_type)\n+elif selected_function == \'GridSearchCV\':\n+ param_grid = params["model_validation_functions"]["param_grid"].replace("__sq__","\'")\\\n+ .replace(\'__dq__\',\'"\').replace("__oc__", "{").replace("__c'..b're">\n- <expand macro="feature_selection_estimator" />\n- <conditional name="extra_estimator">\n- <expand macro="feature_selection_extra_estimator" />\n- <expand macro="feature_selection_estimator_choices" />\n- </conditional>\n+ <expand macro="estimator_input_no_fit" />\n <section name="options" title="Other Options" expanded="false">\n \n <expand macro="model_validation_common_options"/>\n@@ -156,11 +206,7 @@\n </section>\n </when>\n <when value="learning_curve">\n- <expand macro="feature_selection_estimator" />\n- <conditional name="extra_estimator">\n- <expand macro="feature_selection_extra_estimator" />\n- <expand macro="feature_selection_estimator_choices" />\n- </conditional>\n+ <expand macro="estimator_input_no_fit" />\n <section name="options" title="Other Options" expanded="false">\n \n <expand macro="model_validation_common_options"/>\n@@ -178,11 +224,7 @@\n </param>\n </when>\n <when value="permutation_test_score">\n- <expand macro="feature_selection_estimator" />\n- <conditional name="extra_estimator">\n- <expand macro="feature_selection_extra_estimator" />\n- <expand macro="feature_selection_estimator_choices" />\n- </conditional>\n+ <expand macro="estimator_input_no_fit" />\n <section name="options" title="Other Options" expanded="false">\n \n <expand macro="model_validation_common_options"/>\n@@ -197,11 +239,7 @@\n </param>\n </when>\n <when value="validation_curve">\n- <expand macro="feature_selection_estimator" />\n- <conditional name="extra_estimator">\n- <expand macro="feature_selection_extra_estimator" />\n- <expand macro="feature_selection_estimator_choices" />\n- </conditional>\n+ <expand macro="estimator_input_no_fit" />\n <section name="options" title="Other Options" expanded="false">\n <param name="param_name" type="text" value="gamma" label="param_name" help="Name of the parameter that will be varied"/>\n <param name="param_range" type="text" value="np.logspace(-6, -1, 5)" label="param_range" help="The values of the parameter that will be evaluated."/>\n@@ -287,6 +325,23 @@\n <param name="return_type" value="test_scores"/>\n <output name="outfile" file="mv_result06.tabular"/>\n </test>\n+ <test>\n+ <param name="do_feature_selection" value="Yes"/>\n+ <param name="selected_algorithm" value="SelectKBest"/>\n+ <param name="score_func" value="chi2"/>\n+ <param name="selected_function" value="GridSearchCV"/>\n+ <param name="estimator" value="svm.SVR(kernel="linear")"/>\n+ <param name="has_estimator" value="yes"/>\n+ <param name="param_grid" value="[{\'feature_selector__k\': [3, 7], \'estimator__C\': [1, 100]}]"/>\n+ <param name="return_type" value="best_score_"/>\n+ <param name="infile1" value="regression_X.tabular" ftype="tabular"/>\n+ <param name="header1" value="true" />\n+ <param name="col1" value="1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17"/>\n+ <param name="infile2" value="regression_y.tabular" ftype="tabular"/>\n+ <param name="header2" value="true" />\n+ <param name="col2" value="1"/>\n+ <output name="outfile" file="mv_result07.tabular"/>\n+ </test>\n </tests>\n <help>\n <![CDATA[\n'

diff -r 02eadaaa4bf7 -r dd502cb0d567 test-data/mv_result07.tabular
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/mv_result07.tabular Wed May 30 08:27:01 2018 -0400

@@ -0,0 +1,1 @@
+0.7824428015300172