comparison feature_selection.xml @ 10:96f9b73327f2 draft

planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 76583c1fcd9d06a4679cc46ffaee44117b9e22cd
author bgruening
date Sat, 04 Aug 2018 12:35:10 -0400
parents 537c6763c018
children f8dfdb47508b
comparison
equal deleted inserted replaced
9:537c6763c018 10:96f9b73327f2
17 <![CDATA[ 17 <![CDATA[
18 import sys 18 import sys
19 import json 19 import json
20 import pandas 20 import pandas
21 import pickle 21 import pickle
22 import ast
22 import numpy as np 23 import numpy as np
24 import xgboost
23 import sklearn.feature_selection 25 import sklearn.feature_selection
24 from sklearn import svm, linear_model, ensemble 26 from sklearn import svm, linear_model, ensemble, naive_bayes, tree, neighbors
25 27
26 @COLUMNS_FUNCTION@ 28 @COLUMNS_FUNCTION@
27 29 @GET_ESTIMATOR_FUNCTION@
28 @FEATURE_SELECTOR_FUNCTION@ 30 @FEATURE_SELECTOR_FUNCTION@
29 31
30 input_json_path = sys.argv[1] 32 input_json_path = sys.argv[1]
31 with open(input_json_path, "r") as param_handler: 33 with open(input_json_path, "r") as param_handler:
32 params = json.load(param_handler) 34 params = json.load(param_handler)
33 35
34 ## Read features 36 #handle cheetah
37 #if $fs_algorithm_selector.selected_algorithm == "SelectFromModel"\
38 and $fs_algorithm_selector.model_inputter.input_mode == "prefitted":
39 params['fs_algorithm_selector']['model_inputter']['fitted_estimator'] =\
40 "$fs_algorithm_selector.model_inputter.fitted_estimator"
41 #end if
42
43 # Read features
35 features_has_header = params["input_options"]["header1"] 44 features_has_header = params["input_options"]["header1"]
36 input_type = params["input_options"]["selected_input"] 45 input_type = params["input_options"]["selected_input"]
37 if input_type=="tabular": 46 if input_type=="tabular":
38 header = 'infer' if features_has_header else None 47 header = 'infer' if features_has_header else None
39 column_option = params["input_options"]["column_selector_options_1"]["selected_column_selector_option"] 48 column_option = params["input_options"]["column_selector_options_1"]["selected_column_selector_option"]
51 parse_dates=True 60 parse_dates=True
52 ) 61 )
53 else: 62 else:
54 X = mmread("$input_options.infile1") 63 X = mmread("$input_options.infile1")
55 64
56 ## Read labels 65 # Read labels
57 header = 'infer' if params["input_options"]["header2"] else None 66 header = 'infer' if params["input_options"]["header2"] else None
58 column_option = params["input_options"]["column_selector_options_2"]["selected_column_selector_option2"] 67 column_option = params["input_options"]["column_selector_options_2"]["selected_column_selector_option2"]
59 if column_option in ["by_index_number", "all_but_by_index_number", "by_header_name", "all_but_by_header_name"]: 68 if column_option in ["by_index_number", "all_but_by_index_number", "by_header_name", "all_but_by_header_name"]:
60 c = params["input_options"]["column_selector_options_2"]["col2"] 69 c = params["input_options"]["column_selector_options_2"]["col2"]
61 else: 70 else:
68 header=header, 77 header=header,
69 parse_dates=True 78 parse_dates=True
70 ) 79 )
71 y=y.ravel() 80 y=y.ravel()
72 81
73 ## Create feature selector 82 # Create feature selector
74 new_selector = feature_selector(params['feature_selection_algorithms']) 83 new_selector = feature_selector(params['fs_algorithm_selector'])
75 if params['feature_selection_algorithms']['selected_algorithm'] != 'SelectFromModel' or \ 84 if params['fs_algorithm_selector']['selected_algorithm'] != 'SelectFromModel'\
76 'extra_estimator' not in params['feature_selection_algorithms'] or \ 85 or params['fs_algorithm_selector']['model_inputter']['input_mode'] != 'prefitted' :
77 params['feature_selection_algorithms']['extra_estimator']['has_estimator'] != 'no_load' :
78 new_selector.fit(X, y) 86 new_selector.fit(X, y)
79 87
80 ## Transform to select features 88 ## Transform to select features
81 selected_names = None 89 selected_names = None
82 if "$select_methods.selected_method" == "fit_transform": 90 if "$output_method_selector.selected_method" == "fit_transform":
83 res = new_selector.transform(X) 91 res = new_selector.transform(X)
84 if features_has_header: 92 if features_has_header:
85 selected_names = input_df.columns[new_selector.get_support(indices=True)] 93 selected_names = input_df.columns[new_selector.get_support(indices=True)]
86 else: 94 else:
87 res = new_selector.get_support(params["select_methods"]["indices"]) 95 res = new_selector.get_support(params["output_method_selector"]["indices"])
88 96
89 res = pandas.DataFrame(res, columns = selected_names) 97 res = pandas.DataFrame(res, columns = selected_names)
90 res.to_csv(path_or_buf="$outfile", sep='\t', index=False) 98 res.to_csv(path_or_buf="$outfile", sep='\t', index=False)
91 99
92 100
93 ]]> 101 ]]>
94 </configfile> 102 </configfile>
95 </configfiles> 103 </configfiles>
96 <inputs> 104 <inputs>
97 <expand macro="feature_selection_all" /> 105 <expand macro="feature_selection_all">
98 <expand macro="feature_selection_methods" /> 106 <expand macro="fs_selectfrommodel_prefitted"/>
107 </expand>
108 <expand macro="feature_selection_output_mothods" />
99 <expand macro="sl_mixed_input"/> 109 <expand macro="sl_mixed_input"/>
100 </inputs> 110 </inputs>
101 <outputs> 111 <outputs>
102 <data format="tabular" name="outfile"/> 112 <data format="tabular" name="outfile"/>
103 </outputs> 113 </outputs>
104 <tests> 114 <tests>
105 <test> 115 <test>
106 <param name="selected_algorithm" value="SelectFromModel"/> 116 <param name="selected_algorithm" value="SelectFromModel"/>
107 <param name="has_estimator" value="no"/> 117 <param name="input_mode" value="new"/>
108 <param name="new_estimator" value="ensemble.RandomForestRegressor(n_estimators = 1000, random_state = 42)"/> 118 <param name="selected_module" value="ensemble"/>
109 <param name="infile1" value="regression_X.tabular" ftype="tabular"/> 119 <param name="selected_estimator" value="RandomForestRegressor"/>
110 <param name="header1" value="True"/> 120 <param name="text_params" value="'n_estimators': 10, 'random_state': 10"/>
111 <param name="col1" value="1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17"/> 121 <param name="infile1" value="regression_train.tabular" ftype="tabular"/>
112 <param name="infile2" value="regression_y.tabular" ftype="tabular"/> 122 <param name="header1" value="false"/>
113 <param name="col2" value="1"/> 123 <param name="col1" value="1,2,3,4,5"/>
114 <param name="header2" value="True"/> 124 <param name="infile2" value="regression_train.tabular" ftype="tabular"/>
125 <param name="col2" value="6"/>
126 <param name="header2" value="false"/>
115 <output name="outfile" file="feature_selection_result01"/> 127 <output name="outfile" file="feature_selection_result01"/>
116 </test> 128 </test>
117 <test> 129 <test>
118 <param name="selected_algorithm" value="GenericUnivariateSelect"/> 130 <param name="selected_algorithm" value="GenericUnivariateSelect"/>
119 <param name="param" value="20"/> 131 <param name="param" value="20"/>
178 <param name="header2" value="True"/> 190 <param name="header2" value="True"/>
179 <output name="outfile" file="feature_selection_result07"/> 191 <output name="outfile" file="feature_selection_result07"/>
180 </test> 192 </test>
181 <test> 193 <test>
182 <param name="selected_algorithm" value="RFE"/> 194 <param name="selected_algorithm" value="RFE"/>
183 <param name="has_estimator" value="no"/> 195 <param name="input_mode" value="new"/>
184 <param name="new_estimator" value="ensemble.RandomForestRegressor(n_estimators = 1000, random_state = 42)"/> 196 <param name="selected_module" value="ensemble"/>
185 <param name="infile1" value="regression_X.tabular" ftype="tabular"/> 197 <param name="selected_estimator" value="RandomForestRegressor"/>
186 <param name="header1" value="True"/> 198 <param name="text_params" value="'n_estimators': 10, 'random_state':10"/>
187 <param name="col1" value="1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17"/> 199 <param name="infile1" value="regression_train.tabular" ftype="tabular"/>
188 <param name="infile2" value="regression_y.tabular" ftype="tabular"/> 200 <param name="header1" value="false"/>
189 <param name="col2" value="1"/> 201 <param name="col1" value="1,2,3,4,5"/>
190 <param name="header2" value="True"/> 202 <param name="infile2" value="regression_train.tabular" ftype="tabular"/>
203 <param name="col2" value="6"/>
204 <param name="header2" value="false"/>
191 <output name="outfile" file="feature_selection_result08"/> 205 <output name="outfile" file="feature_selection_result08"/>
192 </test> 206 </test>
193 <test> 207 <test>
194 <param name="selected_algorithm" value="RFECV"/> 208 <param name="selected_algorithm" value="RFECV"/>
195 <param name="has_estimator" value="no"/> 209 <param name="input_mode" value="new"/>
196 <param name="new_estimator" value="ensemble.RandomForestRegressor(n_estimators = 1000, random_state = 42)"/> 210 <param name="selected_module" value="ensemble"/>
197 <param name="infile1" value="regression_X.tabular" ftype="tabular"/> 211 <param name="selected_estimator" value="RandomForestRegressor"/>
198 <param name="header1" value="True"/> 212 <param name="text_params" value="'n_estimators': 10, 'random_state':10"/>
199 <param name="col1" value="1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17"/> 213 <param name="infile1" value="regression_train.tabular" ftype="tabular"/>
200 <param name="infile2" value="regression_y.tabular" ftype="tabular"/> 214 <param name="header1" value="false"/>
201 <param name="col2" value="1"/> 215 <param name="col1" value="1,2,3,4,5"/>
202 <param name="header2" value="True"/> 216 <param name="infile2" value="regression_train.tabular" ftype="tabular"/>
217 <param name="col2" value="6"/>
218 <param name="header2" value="false"/>
203 <output name="outfile" file="feature_selection_result09"/> 219 <output name="outfile" file="feature_selection_result09"/>
204 </test> 220 </test>
205 <test> 221 <test>
206 <param name="selected_algorithm" value="VarianceThreshold"/> 222 <param name="selected_algorithm" value="VarianceThreshold"/>
207 <param name="threshold" value="0.1"/> 223 <param name="threshold" value="0.1"/>
223 <param name="infile2" value="test3.tabular" ftype="tabular"/> 239 <param name="infile2" value="test3.tabular" ftype="tabular"/>
224 <param name="header2" value="True"/> 240 <param name="header2" value="True"/>
225 <param name="selected_column_selector_option2" value="by_header_name"/> 241 <param name="selected_column_selector_option2" value="by_header_name"/>
226 <param name="col2" value="target"/> 242 <param name="col2" value="target"/>
227 <output name="outfile" file="feature_selection_result11"/> 243 <output name="outfile" file="feature_selection_result11"/>
244 </test>
245 <test>
246 <param name="selected_algorithm" value="SelectFromModel"/>
247 <param name="input_mode" value="prefitted"/>
248 <param name="fitted_estimator" value="rfr_model01" ftype="zip"/>
249 <param name="infile1" value="regression_train.tabular" ftype="tabular"/>
250 <param name="header1" value="false"/>
251 <param name="col1" value="1,2,3,4,5"/>
252 <param name="infile2" value="regression_train.tabular" ftype="tabular"/>
253 <param name="col2" value="1"/>
254 <param name="header2" value="false"/>
255 <output name="outfile" file="feature_selection_result12"/>
228 </test> 256 </test>
229 </tests> 257 </tests>
230 <help> 258 <help>
231 <![CDATA[ 259 <![CDATA[
232 **What it does** 260 **What it does**