changeset 8:b1c2fe7df3f3 draft

planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 79fe42239dcf077b13f85cbcd6c6e30d7e1e4832
author bgruening
date Tue, 22 May 2018 19:32:49 -0400
parents cdb7948427aa
children 1b2b2d304e44
files main_macros.xml test-data/feature_selection_result01 test-data/feature_selection_result02 test-data/feature_selection_result03 test-data/feature_selection_result04 test-data/feature_selection_result05 test-data/feature_selection_result06 test-data/feature_selection_result07 test-data/feature_selection_result08 test-data/feature_selection_result09 test-data/feature_selection_result10
diffstat 11 files changed, 139 insertions(+), 12 deletions(-) [+]
line wrap: on
line diff
--- a/main_macros.xml	Sat Apr 28 18:09:56 2018 -0400
+++ b/main_macros.xml	Tue May 22 19:32:49 2018 -0400
@@ -2,12 +2,17 @@
   <token name="@VERSION@">0.9</token>
 
   <token name="@COLUMNS_FUNCTION@">
-def read_columns(f, c, **args):
+def read_columns(f, c, return_df=False, **args):
   data = pandas.read_csv(f, **args)
   cols = c.split (',')
   cols = map(int, cols)
   cols = list(map(lambda x: x - 1, cols))
-  y = data.iloc[:,cols].values
+  data = data.iloc[:,cols]
+  y = data.values
+  if return_df:
+    return y, data
+  else:
+    return y
   return y
   </token>
 
@@ -789,6 +794,128 @@
     </when>
     <yield/>
   </xml>
+  <xml name="feature_selection_all">
+    <conditional name="feature_selection_algorithms">
+      <param name="selected_algorithm" type="select" label="Select a feature selection algorithm">
+        <option value="SelectFromModel" selected="true">SelectFromModel - Meta-transformer for selecting features based on importance weights</option>
+        <option value="GenericUnivariateSelect" selected="true">GenericUnivariateSelect - Univariate feature selector with configurable strategy</option>
+        <option value="SelectPercentile">SelectPercentile - Select features according to a percentile of the highest scores</option>
+        <option value="SelectKBest">SelectKBest - Select features according to the k highest scores</option>
+        <option value="SelectFpr">SelectFpr - Filter: Select the p-values below alpha based on a FPR test</option>
+        <option value="SelectFdr">SelectFdr - Filter: Select the p-values for an estimated false discovery rate</option>
+        <option value="SelectFwe">SelectFwe - Filter: Select the p-values corresponding to Family-wise error rate</option>
+        <option value="RFE">RFE - Feature ranking with recursive feature elimination</option>
+        <option value="RFECV">RFECV - Feature ranking with recursive feature elimination and cross-validated selection of the best number of features</option>
+        <option value="VarianceThreshold">VarianceThreshold - Feature selector that removes all low-variance features</option>
+        <!--option value="chi2">Compute chi-squared stats between each non-negative feature and class</option-->
+        <!--option value="f_classif">Compute the ANOVA F-value for the provided sample</option-->
+        <!--option value="f_regression">Univariate linear regression tests</option-->
+        <!--option value="mutual_info_classif">Estimate mutual information for a discrete target variable</option-->
+        <!--option value="mutual_info_regression">Estimate mutual information for a continuous target variable</option-->
+      </param>
+      <when value="SelectFromModel">
+        <expand macro="feature_selection_estimator" />
+        <conditional name="extra_estimator">
+          <expand macro="feature_selection_extra_estimator" >
+            <option value="no_load">No, I will load a prefitted estimator</option>
+          </expand>
+          <expand macro="feature_selection_estimator_choices" >
+            <when value="no_load">
+              <param name="fitted_estimator" type="data" format='zip' label="Load a prefitted estimator" />
+            </when>
+          </expand>
+        </conditional>
+        <section name="options" title="Other Options" expanded="True">
+          <param argument="threshold" type="text" value="" optional="true" label="threshold" help="The threshold value to use for feature selection. e.g. 'mean', 'median', '1.25*mean'." />
+          <param argument="norm_order" type="integer" value="1" label="norm_order" help="Order of the norm used to filter the vectors of coefficients below threshold in the case where the coef_ attribute of the estimator is of dimension 2. " />
+        </section>
+      </when>
+      <when value="GenericUnivariateSelect">
+        <expand macro="feature_selection_score_function" />
+        <section name="options" title="Other Options" expanded="True">
+          <param argument="mode" type="select" label="Feature selection mode">
+            <option value="percentile">percentile</option>
+            <option value="k_best">k_best</option>
+            <option value="fpr">fpr</option>
+            <option value="fdr">fdr</option>
+            <option value="fwe">fwe</option>
+          </param>
+          <param argument="param" type="float" value="" optional="true" label="Parameter of the corresponding mode" help="float or int depending on the feature selection mode" />
+        </section>
+      </when>
+      <when value="SelectPercentile">
+        <expand macro="feature_selection_score_function" />
+        <section name="options" title="Other Options" expanded="True">
+          <param argument="percentile" type="integer" value="10" optional="True" label="Percent of features to keep" />
+        </section>
+      </when>
+      <when value="SelectKBest">
+        <expand macro="feature_selection_score_function" />
+        <section name="options" title="Other Options" expanded="True">
+          <param argument="k" type="integer" value="10" optional="True" label="Number of top features to select" help="No 'all' option is supported." />
+        </section>
+      </when>
+      <when value="SelectFpr">
+        <expand macro="feature_selection_score_function" />
+        <section name="options" title="Other Options" expanded="True">
+          <param argument="alpha" type="float" value="" optional="True" label="Alpha" help="The highest p-value for features to be kept."/>
+        </section>
+      </when>
+      <when value="SelectFdr">
+        <expand macro="feature_selection_score_function" />
+        <section name="options" title="Other Options" expanded="True">
+          <param argument="alpha" type="float" value="" optional="True" label="Alpha" help="The highest uncorrected p-value for features to keep."/>
+        </section>
+      </when>
+      <when value="SelectFwe">
+        <expand macro="feature_selection_score_function" />
+        <section name="options" title="Other Options" expanded="True">
+          <param argument="alpha" type="float" value="" optional="True" label="Alpha" help="The highest uncorrected p-value for features to keep."/>
+        </section>
+      </when>
+      <when value="RFE">
+        <expand macro="feature_selection_estimator" />
+        <conditional name="extra_estimator">
+          <expand macro="feature_selection_extra_estimator" />
+          <expand macro="feature_selection_estimator_choices" />
+        </conditional>
+        <section name="options" title="Other Options" expanded="True">
+          <param argument="n_features_to_select" type="integer" value="" optional="true" label="n_features_to_select" help="The number of features to select. If None, half of the features are selected." />
+          <param argument="step" type="float" value="1" label="step" optional="true" help="Default = 1. " />
+          <param argument="verbose" type="integer" value="0" label="verbose" help="Controls verbosity of output." />
+        </section>
+      </when>
+      <when value="RFECV">
+        <expand macro="feature_selection_estimator" />
+        <conditional name="extra_estimator">
+          <expand macro="feature_selection_extra_estimator" />
+          <expand macro="feature_selection_estimator_choices" />
+        </conditional>
+        <section name="options" title="Other Options" expanded="True">
+          <param argument="step" type="float" value="1" label="step" optional="true" help="Default = 1. " />
+          <param argument="cv" type="integer" value="" optional="true" label="cv" help="Determines the cross-validation splitting strategy" />
+          <param argument="scoring" type="text" value="" optional="true" label="scoring" help="A string (see model evaluation documentation) or a scorer callable object / function with signature scorer(estimator, X, y)."/>
+          <param argument="verbose" type="integer" value="0" label="verbose" help="Controls verbosity of output." />
+          <param argument="n_jobs" type="integer" value="1" label="n_jobs" help="Number of cores to run in parallel while fitting across folds. Defaults to 1 core."/>
+        </section>
+      </when>
+      <when value="VarianceThreshold">
+        <section name="options" title="Options" expanded="True">
+          <param argument="threshold" type="float" value="" optional="True" label="Threshold" help="Features with a training-set variance lower than this threshold will be removed."/>
+        </section>
+      </when>
+      <!--when value="chi2">
+      </when>
+      <when value="f_classif">
+      </when>
+      <when value="f_regression">
+      </when>
+      <when value="mutual_info_classif">
+      </when>
+      <when value="mutual_info_regression">
+      </when-->
+    </conditional>
+  </xml>
   <xml name="feature_selection_score_function">
     <param argument="score_func" type="select" label="Select a score function">
       <option value="chi2">chi2 - Compute chi-squared stats between each non-negative feature and class</option>
--- a/test-data/feature_selection_result01	Sat Apr 28 18:09:56 2018 -0400
+++ b/test-data/feature_selection_result01	Tue May 22 19:32:49 2018 -0400
@@ -1,4 +1,4 @@
-0	1
+temp_1	average
 69.0	69.7
 59.0	58.1
 88.0	77.3
--- a/test-data/feature_selection_result02	Sat Apr 28 18:09:56 2018 -0400
+++ b/test-data/feature_selection_result02	Tue May 22 19:32:49 2018 -0400
@@ -1,4 +1,4 @@
-0	1	2	3
+temp_2	temp_1	forecast_noaa	friend
 68.0	69.0	65.0	88.0
 60.0	59.0	57.0	66.0
 85.0	88.0	75.0	70.0
--- a/test-data/feature_selection_result03	Sat Apr 28 18:09:56 2018 -0400
+++ b/test-data/feature_selection_result03	Tue May 22 19:32:49 2018 -0400
@@ -1,4 +1,4 @@
-0	1
+temp_1	friend
 69.0	88.0
 59.0	66.0
 88.0	70.0
--- a/test-data/feature_selection_result04	Sat Apr 28 18:09:56 2018 -0400
+++ b/test-data/feature_selection_result04	Tue May 22 19:32:49 2018 -0400
@@ -1,4 +1,4 @@
-0	1	2	3	4	5	6	7	8	9
+month	day	temp_2	temp_1	average	forecast_noaa	forecast_acc	forecast_under	friend	week_Mon
 9.0	19.0	68.0	69.0	69.7	65.0	74.0	71.0	88.0	1.0
 4.0	14.0	60.0	59.0	58.1	57.0	63.0	58.0	66.0	0.0
 7.0	30.0	85.0	88.0	77.3	75.0	79.0	77.0	70.0	0.0
--- a/test-data/feature_selection_result05	Sat Apr 28 18:09:56 2018 -0400
+++ b/test-data/feature_selection_result05	Tue May 22 19:32:49 2018 -0400
@@ -1,4 +1,4 @@
-0	1	2	3	4	5	6	7	8
+month	day	temp_2	temp_1	average	forecast_noaa	forecast_acc	forecast_under	friend
 9.0	19.0	68.0	69.0	69.7	65.0	74.0	71.0	88.0
 4.0	14.0	60.0	59.0	58.1	57.0	63.0	58.0	66.0
 7.0	30.0	85.0	88.0	77.3	75.0	79.0	77.0	70.0
--- a/test-data/feature_selection_result06	Sat Apr 28 18:09:56 2018 -0400
+++ b/test-data/feature_selection_result06	Tue May 22 19:32:49 2018 -0400
@@ -1,4 +1,4 @@
-0	1	2	3	4	5	6	7	8
+month	day	temp_2	temp_1	average	forecast_noaa	forecast_acc	forecast_under	friend
 9.0	19.0	68.0	69.0	69.7	65.0	74.0	71.0	88.0
 4.0	14.0	60.0	59.0	58.1	57.0	63.0	58.0	66.0
 7.0	30.0	85.0	88.0	77.3	75.0	79.0	77.0	70.0
--- a/test-data/feature_selection_result07	Sat Apr 28 18:09:56 2018 -0400
+++ b/test-data/feature_selection_result07	Tue May 22 19:32:49 2018 -0400
@@ -1,4 +1,4 @@
-0	1	2	3	4	5	6	7	8
+month	day	temp_2	temp_1	average	forecast_noaa	forecast_acc	forecast_under	friend
 9.0	19.0	68.0	69.0	69.7	65.0	74.0	71.0	88.0
 4.0	14.0	60.0	59.0	58.1	57.0	63.0	58.0	66.0
 7.0	30.0	85.0	88.0	77.3	75.0	79.0	77.0	70.0
--- a/test-data/feature_selection_result08	Sat Apr 28 18:09:56 2018 -0400
+++ b/test-data/feature_selection_result08	Tue May 22 19:32:49 2018 -0400
@@ -1,4 +1,4 @@
-0	1	2	3	4	5	6	7
+day	temp_2	temp_1	average	forecast_noaa	forecast_acc	forecast_under	friend
 19.0	68.0	69.0	69.7	65.0	74.0	71.0	88.0
 14.0	60.0	59.0	58.1	57.0	63.0	58.0	66.0
 30.0	85.0	88.0	77.3	75.0	79.0	77.0	70.0
--- a/test-data/feature_selection_result09	Sat Apr 28 18:09:56 2018 -0400
+++ b/test-data/feature_selection_result09	Tue May 22 19:32:49 2018 -0400
@@ -1,4 +1,4 @@
-0	1	2	3	4	5	6	7	8	9	10	11	12	13
+month	day	temp_2	temp_1	average	forecast_noaa	forecast_acc	forecast_under	friend	week_Fri	week_Mon	week_Sat	week_Sun	week_Tues
 9.0	19.0	68.0	69.0	69.7	65.0	74.0	71.0	88.0	0.0	1.0	0.0	0.0	0.0
 4.0	14.0	60.0	59.0	58.1	57.0	63.0	58.0	66.0	0.0	0.0	0.0	0.0	0.0
 7.0	30.0	85.0	88.0	77.3	75.0	79.0	77.0	70.0	0.0	0.0	1.0	0.0	0.0
--- a/test-data/feature_selection_result10	Sat Apr 28 18:09:56 2018 -0400
+++ b/test-data/feature_selection_result10	Tue May 22 19:32:49 2018 -0400
@@ -1,4 +1,4 @@
-0	1	2	3	4	5	6	7	8	9	10	11	12	13	14	15
+month	day	temp_2	temp_1	average	forecast_noaa	forecast_acc	forecast_under	friend	week_Fri	week_Mon	week_Sat	week_Sun	week_Thurs	week_Tues	week_Wed
 9.0	19.0	68.0	69.0	69.7	65.0	74.0	71.0	88.0	0.0	1.0	0.0	0.0	0.0	0.0	0.0
 4.0	14.0	60.0	59.0	58.1	57.0	63.0	58.0	66.0	0.0	0.0	0.0	0.0	1.0	0.0	0.0
 7.0	30.0	85.0	88.0	77.3	75.0	79.0	77.0	70.0	0.0	0.0	1.0	0.0	0.0	0.0	0.0