diff feature_selection.xml @ 10:96f9b73327f2 draft

planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 76583c1fcd9d06a4679cc46ffaee44117b9e22cd
author bgruening
date Sat, 04 Aug 2018 12:35:10 -0400
parents 537c6763c018
children f8dfdb47508b
line wrap: on
line diff
--- a/feature_selection.xml	Fri Jul 13 03:55:31 2018 -0400
+++ b/feature_selection.xml	Sat Aug 04 12:35:10 2018 -0400
@@ -19,19 +19,28 @@
 import json
 import pandas
 import pickle
+import ast
 import numpy as np
+import xgboost
 import sklearn.feature_selection
-from sklearn import svm, linear_model, ensemble
+from sklearn import svm, linear_model, ensemble, naive_bayes, tree, neighbors
 
 @COLUMNS_FUNCTION@
-
+@GET_ESTIMATOR_FUNCTION@
 @FEATURE_SELECTOR_FUNCTION@
 
 input_json_path = sys.argv[1]
 with open(input_json_path, "r") as param_handler:
     params = json.load(param_handler)
 
-## Read features
+#handle cheetah
+#if $fs_algorithm_selector.selected_algorithm == "SelectFromModel"\
+        and $fs_algorithm_selector.model_inputter.input_mode == "prefitted":
+params['fs_algorithm_selector']['model_inputter']['fitted_estimator'] =\
+        "$fs_algorithm_selector.model_inputter.fitted_estimator"
+#end if
+
+# Read features
 features_has_header = params["input_options"]["header1"]
 input_type = params["input_options"]["selected_input"]
 if input_type=="tabular":
@@ -53,7 +62,7 @@
 else:
     X = mmread("$input_options.infile1")
 
-## Read labels
+# Read labels
 header = 'infer' if params["input_options"]["header2"] else None
 column_option = params["input_options"]["column_selector_options_2"]["selected_column_selector_option2"]
 if column_option in ["by_index_number", "all_but_by_index_number", "by_header_name", "all_but_by_header_name"]:
@@ -70,21 +79,20 @@
 )
 y=y.ravel()
 
-## Create feature selector
-new_selector = feature_selector(params['feature_selection_algorithms'])
-if params['feature_selection_algorithms']['selected_algorithm'] != 'SelectFromModel' or \
-        'extra_estimator' not in params['feature_selection_algorithms'] or \
-        params['feature_selection_algorithms']['extra_estimator']['has_estimator'] != 'no_load' :
+# Create feature selector
+new_selector = feature_selector(params['fs_algorithm_selector'])
+if params['fs_algorithm_selector']['selected_algorithm'] != 'SelectFromModel'\
+        or params['fs_algorithm_selector']['model_inputter']['input_mode'] != 'prefitted' :
     new_selector.fit(X, y)
 
 ## Transform to select features
 selected_names = None
-if "$select_methods.selected_method" == "fit_transform":
+if "$output_method_selector.selected_method" == "fit_transform":
     res = new_selector.transform(X)
     if features_has_header:
         selected_names = input_df.columns[new_selector.get_support(indices=True)]
 else:
-    res = new_selector.get_support(params["select_methods"]["indices"])
+    res = new_selector.get_support(params["output_method_selector"]["indices"])
 
 res = pandas.DataFrame(res, columns = selected_names)
 res.to_csv(path_or_buf="$outfile", sep='\t', index=False)
@@ -94,8 +102,10 @@
         </configfile>
     </configfiles>
     <inputs>
-        <expand macro="feature_selection_all" />
-        <expand macro="feature_selection_methods" />
+        <expand macro="feature_selection_all">
+            <expand macro="fs_selectfrommodel_prefitted"/>
+        </expand>
+        <expand macro="feature_selection_output_mothods" />
         <expand macro="sl_mixed_input"/>
     </inputs>
     <outputs>
@@ -104,14 +114,16 @@
     <tests>
         <test>
             <param name="selected_algorithm" value="SelectFromModel"/>
-            <param name="has_estimator" value="no"/>
-            <param name="new_estimator" value="ensemble.RandomForestRegressor(n_estimators = 1000, random_state = 42)"/>
-            <param name="infile1" value="regression_X.tabular" ftype="tabular"/>
-            <param name="header1" value="True"/>
-            <param name="col1" value="1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17"/>
-            <param name="infile2" value="regression_y.tabular" ftype="tabular"/>
-            <param name="col2" value="1"/>
-            <param name="header2" value="True"/>
+            <param name="input_mode" value="new"/>
+            <param name="selected_module" value="ensemble"/>
+            <param name="selected_estimator" value="RandomForestRegressor"/>
+            <param name="text_params" value="'n_estimators': 10, 'random_state': 10"/>
+            <param name="infile1" value="regression_train.tabular" ftype="tabular"/>
+            <param name="header1" value="false"/>
+            <param name="col1" value="1,2,3,4,5"/>
+            <param name="infile2" value="regression_train.tabular" ftype="tabular"/>
+            <param name="col2" value="6"/>
+            <param name="header2" value="false"/>
             <output name="outfile" file="feature_selection_result01"/>
         </test>
         <test>
@@ -180,26 +192,30 @@
         </test>
         <test>
             <param name="selected_algorithm" value="RFE"/>
-            <param name="has_estimator" value="no"/>
-            <param name="new_estimator" value="ensemble.RandomForestRegressor(n_estimators = 1000, random_state = 42)"/>
-            <param name="infile1" value="regression_X.tabular" ftype="tabular"/>
-            <param name="header1" value="True"/>
-            <param name="col1" value="1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17"/>
-            <param name="infile2" value="regression_y.tabular" ftype="tabular"/>
-            <param name="col2" value="1"/>
-            <param name="header2" value="True"/>
+            <param name="input_mode" value="new"/>
+            <param name="selected_module" value="ensemble"/>
+            <param name="selected_estimator" value="RandomForestRegressor"/>
+            <param name="text_params" value="'n_estimators': 10, 'random_state':10"/>
+            <param name="infile1" value="regression_train.tabular" ftype="tabular"/>
+            <param name="header1" value="false"/>
+            <param name="col1" value="1,2,3,4,5"/>
+            <param name="infile2" value="regression_train.tabular" ftype="tabular"/>
+            <param name="col2" value="6"/>
+            <param name="header2" value="false"/>
             <output name="outfile" file="feature_selection_result08"/>
         </test>
         <test>
             <param name="selected_algorithm" value="RFECV"/>
-            <param name="has_estimator" value="no"/>
-            <param name="new_estimator" value="ensemble.RandomForestRegressor(n_estimators = 1000, random_state = 42)"/>
-            <param name="infile1" value="regression_X.tabular" ftype="tabular"/>
-            <param name="header1" value="True"/>
-            <param name="col1" value="1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17"/>
-            <param name="infile2" value="regression_y.tabular" ftype="tabular"/>
-            <param name="col2" value="1"/>
-            <param name="header2" value="True"/>
+            <param name="input_mode" value="new"/>
+            <param name="selected_module" value="ensemble"/>
+            <param name="selected_estimator" value="RandomForestRegressor"/>
+            <param name="text_params" value="'n_estimators': 10, 'random_state':10"/>
+            <param name="infile1" value="regression_train.tabular" ftype="tabular"/>
+            <param name="header1" value="false"/>
+            <param name="col1" value="1,2,3,4,5"/>
+            <param name="infile2" value="regression_train.tabular" ftype="tabular"/>
+            <param name="col2" value="6"/>
+            <param name="header2" value="false"/>
             <output name="outfile" file="feature_selection_result09"/>
         </test>
         <test>
@@ -226,6 +242,18 @@
             <param name="col2" value="target"/>
             <output name="outfile" file="feature_selection_result11"/>
         </test>
+        <test>
+            <param name="selected_algorithm" value="SelectFromModel"/>
+            <param name="input_mode" value="prefitted"/>
+            <param name="fitted_estimator" value="rfr_model01" ftype="zip"/>
+            <param name="infile1" value="regression_train.tabular" ftype="tabular"/>
+            <param name="header1" value="false"/>
+            <param name="col1" value="1,2,3,4,5"/>
+            <param name="infile2" value="regression_train.tabular" ftype="tabular"/>
+            <param name="col2" value="1"/>
+            <param name="header2" value="false"/>
+            <output name="outfile" file="feature_selection_result12"/>
+        </test>
     </tests>
     <help>
         <![CDATA[