Repository 'sklearn_data_preprocess'
hg clone https://toolshed.g2.bx.psu.edu/repos/bgruening/sklearn_data_preprocess

Changeset 9:595ecc6adb2c (2018-05-30)
Previous changeset 8:4c7ec23f6cac (2018-05-22) Next changeset 10:059417d4e10d (2018-06-05)
Commit message:
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 4ed8c4f6ef9ece81797a398b17a99bbaf49a6978
modified:
main_macros.xml
added:
test-data/mv_result07.tabular
b
diff -r 4c7ec23f6cac -r 595ecc6adb2c main_macros.xml
--- a/main_macros.xml Tue May 22 19:32:12 2018 -0400
+++ b/main_macros.xml Wed May 30 08:26:01 2018 -0400
[
@@ -16,6 +16,47 @@
   return y
   </token>
 
+## generate an instance for one of sklearn.feature_selection classes
+## must call "@COLUMNS_FUNCTION@"
+  <token name="@FEATURE_SELECTOR_FUNCTION@">
+def feature_selector(inputs):
+  selector = inputs["selected_algorithm"]
+  selector = getattr(sklearn.feature_selection, selector)
+  options = inputs["options"]
+
+  if inputs['selected_algorithm'] == 'SelectFromModel':
+    if not options['threshold'] or options['threshold'] == 'None':
+      options['threshold'] = None
+      if 'extra_estimator' in inputs and inputs['extra_estimator']['has_estimator'] == 'no_load':
+        fitted_estimator = pickle.load(open("inputs['extra_estimator']['fitted_estimator']", 'r'))
+        new_selector = selector(fitted_estimator, prefit=True, **options)
+      else:
+        estimator=inputs["estimator"]
+        if inputs["extra_estimator"]["has_estimator"]=='no':
+          estimator=inputs["extra_estimator"]["new_estimator"]
+        estimator=eval(estimator.replace('__dq__', '"').replace("__sq__","'"))
+        new_selector = selector(estimator, **options)
+
+  elif inputs['selected_algorithm'] in ['RFE', 'RFECV']:
+    if 'scoring' in options and (not options['scoring'] or options['scoring'] == 'None'):
+      options['scoring'] = None
+    estimator=inputs["estimator"]
+    if inputs["extra_estimator"]["has_estimator"]=='no':
+      estimator=inputs["extra_estimator"]["new_estimator"]
+    estimator=eval(estimator.replace('__dq__', '"').replace("__sq__","'"))
+    new_selector = selector(estimator, **options)
+
+  elif inputs['selected_algorithm'] == "VarianceThreshold":
+    new_selector = selector(**options)
+
+  else:
+    score_func = inputs["score_func"]
+    score_func = getattr(sklearn.feature_selection, score_func)
+    new_selector = selector(score_func, **options)
+
+  return new_selector
+  </token>
+
   <xml name="python_requirements">
       <requirements>
           <requirement type="package" version="2.7">python</requirement>
@@ -794,6 +835,13 @@
     </when>
     <yield/>
   </xml>
+  <xml name="estimator_input_no_fit">
+    <expand macro="feature_selection_estimator" />
+    <conditional name="extra_estimator">
+      <expand macro="feature_selection_extra_estimator" />
+      <expand macro="feature_selection_estimator_choices" />
+    </conditional>
+  </xml>
   <xml name="feature_selection_all">
     <conditional name="feature_selection_algorithms">
       <param name="selected_algorithm" type="select" label="Select a feature selection algorithm">
@@ -975,8 +1023,8 @@
     <param argument="scoring" type="text" value="" optional="true" label="scoring" help="A metric used to evaluate the estimator"/>
   </xml>
 
-  <xml name="pre_dispatch">
-    <param argument="pre_dispatch" type="text" value="all" optional="true" label="pre_dispatch" help="Number of predispatched jobs for parallel execution"/>
+  <xml name="pre_dispatch" token_type="text" token_default_value="all" token_help="Number of predispatched jobs for parallel execution">
+    <param argument="pre_dispatch" type="@TYPE@" value="@DEFAULT_VALUE@" optional="true" label="pre_dispatch" help="@HELP@"/>
   </xml>
 
   <!-- Outputs -->
b
diff -r 4c7ec23f6cac -r 595ecc6adb2c test-data/mv_result07.tabular
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/mv_result07.tabular Wed May 30 08:26:01 2018 -0400
b
@@ -0,0 +1,1 @@
+0.7824428015300172