Repository 'sklearn_numeric_clustering'
hg clone https://toolshed.g2.bx.psu.edu/repos/bgruening/sklearn_numeric_clustering

Changeset 8:85d31935d4a2 (2018-05-22)
Previous changeset 7:09cd12b741fb (2018-04-28) Next changeset 9:5a9fcfd4151f (2018-05-30)
Commit message:
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 79fe42239dcf077b13f85cbcd6c6e30d7e1e4832
modified:
main_macros.xml
test-data/feature_selection_result01
test-data/feature_selection_result02
test-data/feature_selection_result03
test-data/feature_selection_result04
test-data/feature_selection_result05
test-data/feature_selection_result06
test-data/feature_selection_result07
test-data/feature_selection_result08
test-data/feature_selection_result09
test-data/feature_selection_result10
b
diff -r 09cd12b741fb -r 85d31935d4a2 main_macros.xml
--- a/main_macros.xml Sat Apr 28 18:08:17 2018 -0400
+++ b/main_macros.xml Tue May 22 19:31:34 2018 -0400
[
b'@@ -2,12 +2,17 @@\n   <token name="@VERSION@">0.9</token>\n \n   <token name="@COLUMNS_FUNCTION@">\n-def read_columns(f, c, **args):\n+def read_columns(f, c, return_df=False, **args):\n   data = pandas.read_csv(f, **args)\n   cols = c.split (\',\')\n   cols = map(int, cols)\n   cols = list(map(lambda x: x - 1, cols))\n-  y = data.iloc[:,cols].values\n+  data = data.iloc[:,cols]\n+  y = data.values\n+  if return_df:\n+    return y, data\n+  else:\n+    return y\n   return y\n   </token>\n \n@@ -789,6 +794,128 @@\n     </when>\n     <yield/>\n   </xml>\n+  <xml name="feature_selection_all">\n+    <conditional name="feature_selection_algorithms">\n+      <param name="selected_algorithm" type="select" label="Select a feature selection algorithm">\n+        <option value="SelectFromModel" selected="true">SelectFromModel - Meta-transformer for selecting features based on importance weights</option>\n+        <option value="GenericUnivariateSelect" selected="true">GenericUnivariateSelect - Univariate feature selector with configurable strategy</option>\n+        <option value="SelectPercentile">SelectPercentile - Select features according to a percentile of the highest scores</option>\n+        <option value="SelectKBest">SelectKBest - Select features according to the k highest scores</option>\n+        <option value="SelectFpr">SelectFpr - Filter: Select the p-values below alpha based on a FPR test</option>\n+        <option value="SelectFdr">SelectFdr - Filter: Select the p-values for an estimated false discovery rate</option>\n+        <option value="SelectFwe">SelectFwe - Filter: Select the p-values corresponding to Family-wise error rate</option>\n+        <option value="RFE">RFE - Feature ranking with recursive feature elimination</option>\n+        <option value="RFECV">RFECV - Feature ranking with recursive feature elimination and cross-validated selection of the best number of features</option>\n+        <option value="VarianceThreshold">VarianceThreshold - Feature selector that removes all low-variance features</option>\n+        <!--option value="chi2">Compute chi-squared stats between each non-negative feature and class</option-->\n+        <!--option value="f_classif">Compute the ANOVA F-value for the provided sample</option-->\n+        <!--option value="f_regression">Univariate linear regression tests</option-->\n+        <!--option value="mutual_info_classif">Estimate mutual information for a discrete target variable</option-->\n+        <!--option value="mutual_info_regression">Estimate mutual information for a continuous target variable</option-->\n+      </param>\n+      <when value="SelectFromModel">\n+        <expand macro="feature_selection_estimator" />\n+        <conditional name="extra_estimator">\n+          <expand macro="feature_selection_extra_estimator" >\n+            <option value="no_load">No, I will load a prefitted estimator</option>\n+          </expand>\n+          <expand macro="feature_selection_estimator_choices" >\n+            <when value="no_load">\n+              <param name="fitted_estimator" type="data" format=\'zip\' label="Load a prefitted estimator" />\n+            </when>\n+          </expand>\n+        </conditional>\n+        <section name="options" title="Other Options" expanded="True">\n+          <param argument="threshold" type="text" value="" optional="true" label="threshold" help="The threshold value to use for feature selection. e.g. \'mean\', \'median\', \'1.25*mean\'." />\n+          <param argument="norm_order" type="integer" value="1" label="norm_order" help="Order of the norm used to filter the vectors of coefficients below threshold in the case where the coef_ attribute of the estimator is of dimension 2. " />\n+        </section>\n+      </when>\n+      <when value="GenericUnivariateSelect">\n+        <expand macro="feature_selection_score_function" />\n+        <section name="options" title="Other Options" expanded="True">\n+          <param argument="mode" type="select" label="Feature selection mode">\n+            <option value="percent'..b'" label="Number of top features to select" help="No \'all\' option is supported." />\n+        </section>\n+      </when>\n+      <when value="SelectFpr">\n+        <expand macro="feature_selection_score_function" />\n+        <section name="options" title="Other Options" expanded="True">\n+          <param argument="alpha" type="float" value="" optional="True" label="Alpha" help="The highest p-value for features to be kept."/>\n+        </section>\n+      </when>\n+      <when value="SelectFdr">\n+        <expand macro="feature_selection_score_function" />\n+        <section name="options" title="Other Options" expanded="True">\n+          <param argument="alpha" type="float" value="" optional="True" label="Alpha" help="The highest uncorrected p-value for features to keep."/>\n+        </section>\n+      </when>\n+      <when value="SelectFwe">\n+        <expand macro="feature_selection_score_function" />\n+        <section name="options" title="Other Options" expanded="True">\n+          <param argument="alpha" type="float" value="" optional="True" label="Alpha" help="The highest uncorrected p-value for features to keep."/>\n+        </section>\n+      </when>\n+      <when value="RFE">\n+        <expand macro="feature_selection_estimator" />\n+        <conditional name="extra_estimator">\n+          <expand macro="feature_selection_extra_estimator" />\n+          <expand macro="feature_selection_estimator_choices" />\n+        </conditional>\n+        <section name="options" title="Other Options" expanded="True">\n+          <param argument="n_features_to_select" type="integer" value="" optional="true" label="n_features_to_select" help="The number of features to select. If None, half of the features are selected." />\n+          <param argument="step" type="float" value="1" label="step" optional="true" help="Default = 1. " />\n+          <param argument="verbose" type="integer" value="0" label="verbose" help="Controls verbosity of output." />\n+        </section>\n+      </when>\n+      <when value="RFECV">\n+        <expand macro="feature_selection_estimator" />\n+        <conditional name="extra_estimator">\n+          <expand macro="feature_selection_extra_estimator" />\n+          <expand macro="feature_selection_estimator_choices" />\n+        </conditional>\n+        <section name="options" title="Other Options" expanded="True">\n+          <param argument="step" type="float" value="1" label="step" optional="true" help="Default = 1. " />\n+          <param argument="cv" type="integer" value="" optional="true" label="cv" help="Determines the cross-validation splitting strategy" />\n+          <param argument="scoring" type="text" value="" optional="true" label="scoring" help="A string (see model evaluation documentation) or a scorer callable object / function with signature scorer(estimator, X, y)."/>\n+          <param argument="verbose" type="integer" value="0" label="verbose" help="Controls verbosity of output." />\n+          <param argument="n_jobs" type="integer" value="1" label="n_jobs" help="Number of cores to run in parallel while fitting across folds. Defaults to 1 core."/>\n+        </section>\n+      </when>\n+      <when value="VarianceThreshold">\n+        <section name="options" title="Options" expanded="True">\n+          <param argument="threshold" type="float" value="" optional="True" label="Threshold" help="Features with a training-set variance lower than this threshold will be removed."/>\n+        </section>\n+      </when>\n+      <!--when value="chi2">\n+      </when>\n+      <when value="f_classif">\n+      </when>\n+      <when value="f_regression">\n+      </when>\n+      <when value="mutual_info_classif">\n+      </when>\n+      <when value="mutual_info_regression">\n+      </when-->\n+    </conditional>\n+  </xml>\n   <xml name="feature_selection_score_function">\n     <param argument="score_func" type="select" label="Select a score function">\n       <option value="chi2">chi2 - Compute chi-squared stats between each non-negative feature and class</option>\n'
b
diff -r 09cd12b741fb -r 85d31935d4a2 test-data/feature_selection_result01
--- a/test-data/feature_selection_result01 Sat Apr 28 18:08:17 2018 -0400
+++ b/test-data/feature_selection_result01 Tue May 22 19:31:34 2018 -0400
b
@@ -1,4 +1,4 @@
-0 1
+temp_1 average
 69.0 69.7
 59.0 58.1
 88.0 77.3
b
diff -r 09cd12b741fb -r 85d31935d4a2 test-data/feature_selection_result02
--- a/test-data/feature_selection_result02 Sat Apr 28 18:08:17 2018 -0400
+++ b/test-data/feature_selection_result02 Tue May 22 19:31:34 2018 -0400
b
@@ -1,4 +1,4 @@
-0 1 2 3
+temp_2 temp_1 forecast_noaa friend
 68.0 69.0 65.0 88.0
 60.0 59.0 57.0 66.0
 85.0 88.0 75.0 70.0
b
diff -r 09cd12b741fb -r 85d31935d4a2 test-data/feature_selection_result03
--- a/test-data/feature_selection_result03 Sat Apr 28 18:08:17 2018 -0400
+++ b/test-data/feature_selection_result03 Tue May 22 19:31:34 2018 -0400
b
@@ -1,4 +1,4 @@
-0 1
+temp_1 friend
 69.0 88.0
 59.0 66.0
 88.0 70.0
b
diff -r 09cd12b741fb -r 85d31935d4a2 test-data/feature_selection_result04
--- a/test-data/feature_selection_result04 Sat Apr 28 18:08:17 2018 -0400
+++ b/test-data/feature_selection_result04 Tue May 22 19:31:34 2018 -0400
b
@@ -1,4 +1,4 @@
-0 1 2 3 4 5 6 7 8 9
+month day temp_2 temp_1 average forecast_noaa forecast_acc forecast_under friend week_Mon
 9.0 19.0 68.0 69.0 69.7 65.0 74.0 71.0 88.0 1.0
 4.0 14.0 60.0 59.0 58.1 57.0 63.0 58.0 66.0 0.0
 7.0 30.0 85.0 88.0 77.3 75.0 79.0 77.0 70.0 0.0
b
diff -r 09cd12b741fb -r 85d31935d4a2 test-data/feature_selection_result05
--- a/test-data/feature_selection_result05 Sat Apr 28 18:08:17 2018 -0400
+++ b/test-data/feature_selection_result05 Tue May 22 19:31:34 2018 -0400
b
@@ -1,4 +1,4 @@
-0 1 2 3 4 5 6 7 8
+month day temp_2 temp_1 average forecast_noaa forecast_acc forecast_under friend
 9.0 19.0 68.0 69.0 69.7 65.0 74.0 71.0 88.0
 4.0 14.0 60.0 59.0 58.1 57.0 63.0 58.0 66.0
 7.0 30.0 85.0 88.0 77.3 75.0 79.0 77.0 70.0
b
diff -r 09cd12b741fb -r 85d31935d4a2 test-data/feature_selection_result06
--- a/test-data/feature_selection_result06 Sat Apr 28 18:08:17 2018 -0400
+++ b/test-data/feature_selection_result06 Tue May 22 19:31:34 2018 -0400
b
@@ -1,4 +1,4 @@
-0 1 2 3 4 5 6 7 8
+month day temp_2 temp_1 average forecast_noaa forecast_acc forecast_under friend
 9.0 19.0 68.0 69.0 69.7 65.0 74.0 71.0 88.0
 4.0 14.0 60.0 59.0 58.1 57.0 63.0 58.0 66.0
 7.0 30.0 85.0 88.0 77.3 75.0 79.0 77.0 70.0
b
diff -r 09cd12b741fb -r 85d31935d4a2 test-data/feature_selection_result07
--- a/test-data/feature_selection_result07 Sat Apr 28 18:08:17 2018 -0400
+++ b/test-data/feature_selection_result07 Tue May 22 19:31:34 2018 -0400
b
@@ -1,4 +1,4 @@
-0 1 2 3 4 5 6 7 8
+month day temp_2 temp_1 average forecast_noaa forecast_acc forecast_under friend
 9.0 19.0 68.0 69.0 69.7 65.0 74.0 71.0 88.0
 4.0 14.0 60.0 59.0 58.1 57.0 63.0 58.0 66.0
 7.0 30.0 85.0 88.0 77.3 75.0 79.0 77.0 70.0
b
diff -r 09cd12b741fb -r 85d31935d4a2 test-data/feature_selection_result08
--- a/test-data/feature_selection_result08 Sat Apr 28 18:08:17 2018 -0400
+++ b/test-data/feature_selection_result08 Tue May 22 19:31:34 2018 -0400
b
@@ -1,4 +1,4 @@
-0 1 2 3 4 5 6 7
+day temp_2 temp_1 average forecast_noaa forecast_acc forecast_under friend
 19.0 68.0 69.0 69.7 65.0 74.0 71.0 88.0
 14.0 60.0 59.0 58.1 57.0 63.0 58.0 66.0
 30.0 85.0 88.0 77.3 75.0 79.0 77.0 70.0
b
diff -r 09cd12b741fb -r 85d31935d4a2 test-data/feature_selection_result09
--- a/test-data/feature_selection_result09 Sat Apr 28 18:08:17 2018 -0400
+++ b/test-data/feature_selection_result09 Tue May 22 19:31:34 2018 -0400
b
@@ -1,4 +1,4 @@
-0 1 2 3 4 5 6 7 8 9 10 11 12 13
+month day temp_2 temp_1 average forecast_noaa forecast_acc forecast_under friend week_Fri week_Mon week_Sat week_Sun week_Tues
 9.0 19.0 68.0 69.0 69.7 65.0 74.0 71.0 88.0 0.0 1.0 0.0 0.0 0.0
 4.0 14.0 60.0 59.0 58.1 57.0 63.0 58.0 66.0 0.0 0.0 0.0 0.0 0.0
 7.0 30.0 85.0 88.0 77.3 75.0 79.0 77.0 70.0 0.0 0.0 1.0 0.0 0.0
b
diff -r 09cd12b741fb -r 85d31935d4a2 test-data/feature_selection_result10
--- a/test-data/feature_selection_result10 Sat Apr 28 18:08:17 2018 -0400
+++ b/test-data/feature_selection_result10 Tue May 22 19:31:34 2018 -0400
b
@@ -1,4 +1,4 @@
-0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
+month day temp_2 temp_1 average forecast_noaa forecast_acc forecast_under friend week_Fri week_Mon week_Sat week_Sun week_Thurs week_Tues week_Wed
 9.0 19.0 68.0 69.0 69.7 65.0 74.0 71.0 88.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0
 4.0 14.0 60.0 59.0 58.1 57.0 63.0 58.0 66.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0
 7.0 30.0 85.0 88.0 77.3 75.0 79.0 77.0 70.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0