comparison pipeline.xml @ 0:73535ddcfa69 draft

planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 76583c1fcd9d06a4679cc46ffaee44117b9e22cd
author bgruening
date Sat, 04 Aug 2018 12:32:40 -0400
parents
children 0857964e07c2
comparison
equal deleted inserted replaced
-1:000000000000 0:73535ddcfa69
1 <tool id="sklearn_build_pipeline" name="Pipeline Builder" version="@VERSION@">
2 <description>constructs a list of transforms and a final estimator</description>
3 <macros>
4 <import>main_macros.xml</import>
5 </macros>
6 <expand macro="python_requirements"/>
7 <expand macro="macro_stdio"/>
8 <version_command>echo "@VERSION@"</version_command>
9 <command>
10 <![CDATA[
11 python "$sklearn_pipeline_script" '$inputs'
12 ]]>
13 </command>
14 <configfiles>
15 <inputs name="inputs" />
16 <configfile name="sklearn_pipeline_script">
17 <![CDATA[
18 import sys
19 import json
20 import pickle
21 import pprint
22 import xgboost
23 import ast
24 import sklearn.feature_selection
25 from sklearn import ( preprocessing, svm, linear_model, ensemble, naive_bayes,
26 tree, neighbors, decomposition, kernel_approximation, cluster)
27 from sklearn.pipeline import Pipeline
28
29 @GET_ESTIMATOR_FUNCTION@
30 @FEATURE_SELECTOR_FUNCTION@
31
32 input_json_path = sys.argv[1]
33 with open(input_json_path, "r") as param_handler:
34 params = json.load(param_handler)
35
36 pipeline_steps = []
37
38 def get_component(input_json, check_none=False):
39 if input_json['component_type'] == 'None':
40 if not check_none:
41 return
42 else:
43 sys.exit("The pre-processing component type can't be None when the number of components is greater than 1.")
44 if input_json['component_type'] == 'pre_processor':
45 preprocessor = input_json["pre_processors"]["selected_pre_processor"]
46 pre_processor_options = input_json["pre_processors"]["options"]
47 my_class = getattr(preprocessing, preprocessor)
48 return my_class(**pre_processor_options)
49 if input_json['component_type'] == 'feature_selection':
50 fs_obj = feature_selector(input_json['fs_algorithm_selector'])
51 return fs_obj
52 if input_json['component_type'] == 'decomposition':
53 algorithm = input_json['matrix_decomposition_selector']['select_algorithm']
54 obj = getattr(decomposition, algorithm)()
55 options = input_json['matrix_decomposition_selector']['text_params'].strip()
56 if options != "":
57 options = ast.literal_eval('{' + options + '}')
58 obj.set_params(**options)
59 return obj
60 if input_json['component_type'] == 'kernel_approximation':
61 algorithm = input_json['kernel_approximation_selector']['select_algorithm']
62 obj = getattr(kernel_approximation, algorithm)()
63 options = input_json['kernel_approximation_selector']['text_params'].strip()
64 if options != "":
65 options = ast.literal_eval('{' + options + '}')
66 obj.set_params(**options)
67 return obj
68 if input_json['component_type'] == 'FeatureAgglomeration':
69 algorithm = input_json['FeatureAgglomeration_selector']['select_algorithm']
70 obj = getattr(cluster, algorithm)()
71 options = input_json['FeatureAgglomeration_selector']['text_params'].strip()
72 if options != "":
73 options = ast.literal_eval('{' + options + '}')
74 obj.set_params(**options)
75 return obj
76 if len(params['pipeline_component']) == 1:
77 step_obj = get_component( params['pipeline_component'][0]['component_selector'])
78 if step_obj:
79 pipeline_steps.append( ('preprocessing_1', step_obj) )
80 else:
81 for i, c in enumerate(params['pipeline_component']):
82 step_obj = get_component( c['component_selector'], check_none=True )
83 pipeline_steps.append( ('preprocessing_' + str(i+1), step_obj) )
84
85 # Set up final estimator and add to pipeline.
86 estimator_json = params["final_estimator"]['estimator_selector']
87 estimator = get_estimator(estimator_json)
88
89 pipeline_steps.append( ('estimator', estimator) )
90
91 pipeline = Pipeline(pipeline_steps)
92 pprint.pprint(pipeline.named_steps)
93
94 with open("$outfile", 'wb') as out_handler:
95 pickle.dump(pipeline, out_handler, pickle.HIGHEST_PROTOCOL)
96
97 ]]>
98 </configfile>
99 </configfiles>
100 <inputs>
101 <repeat name="pipeline_component" min="1" max="5" title="Pre-processing step">
102 <conditional name="component_selector">
103 <param name="component_type" type="select" label="Choose the type of transformation:">
104 <option value="none" selected="true">None</option>
105 <option value="pre_processor">Sklearn Preprocessor</option>
106 <option value="feature_selection">Feature Selection</option>
107 <option value="decomposition">Matrix Decomposition</option>
108 <option value="kernel_approximation">Kernel Approximation</option>
109 <option value="FeatureAgglomeration">Agglomerate Features</option>
110 </param>
111 <when value="none"/>
112 <when value="pre_processor">
113 <conditional name="pre_processors">
114 <expand macro="sparse_preprocessors_ext" />
115 <expand macro="sparse_preprocessor_options_ext" />
116 </conditional>
117 </when>
118 <when value="feature_selection">
119 <expand macro="feature_selection_all">
120 <expand macro="fs_selectfrommodel_no_prefitted"/>
121 </expand>
122 </when>
123 <when value="decomposition">
124 <expand macro="matrix_decomposition_all"/>
125 </when>
126 <when value="kernel_approximation">
127 <expand macro="kernel_approximation_all"/>
128 </when>
129 <when value="FeatureAgglomeration">
130 <expand macro="FeatureAgglomeration"/>
131 </when>
132 </conditional>
133 </repeat>
134 <section name="final_estimator" title="Final Estimator" expanded="true">
135 <expand macro="estimator_selector_all" />
136 </section>
137 </inputs>
138 <outputs>
139 <data format="zip" name="outfile"/>
140 </outputs>
141 <tests>
142 <test>
143 <repeat name="pipeline_component">
144 <conditional name="component_selector">
145 <param name="component_type" value="pre_processor"/>
146 <conditional name="pre_processors">
147 <param name="selected_pre_processor" value="RobustScaler"/>
148 </conditional>
149 </conditional>
150 </repeat>
151 <repeat name="pipeline_component">
152 <conditional name="component_selector">
153 <param name="component_type" value="feature_selection"/>
154 <conditional name="fs_algorithm_selector">
155 <param name="selected_algorithm" value="SelectKBest"/>
156 <param name="score_func" value="f_classif"/>
157 </conditional>
158 </conditional>
159 </repeat>
160 <param name="selected_module" value="svm"/>
161 <param name="selected_estimator" value="SVR"/>
162 <param name="text_params" value="'kernel': 'linear'"/>
163 <output name="outfile" file="pipeline01" compare="sim_size" delta="1"/>
164 </test>
165 <test>
166 <conditional name="component_selector">
167 <param name="component_type" value="pre_processor"/>
168 <conditional name="pre_processors">
169 <param name="selected_pre_processor" value="RobustScaler"/>
170 </conditional>
171 </conditional>
172 <param name="selected_module" value="linear_model"/>
173 <param name="selected_estimator" value="LassoCV"/>
174 <output name="outfile" file="pipeline02" compare="sim_size" delta="1"/>
175 </test>
176 <test>
177 <conditional name="component_selector">
178 <param name="component_type" value="pre_processor"/>
179 <conditional name="pre_processors">
180 <param name="selected_pre_processor" value="RobustScaler"/>
181 </conditional>
182 </conditional>
183 <param name="selected_module" value="xgboost"/>
184 <param name="selected_estimator" value="XGBClassifier"/>
185 <output name="outfile" file="pipeline03" compare="sim_size" delta="1"/>
186 </test>
187 <test>
188 <conditional name="component_selector">
189 <param name="component_type" value="feature_selection"/>
190 <conditional name="fs_algorithm_selector">
191 <param name="selected_algorithm" value="SelectFromModel"/>
192 <conditional name="model_inputter">
193 <conditional name="estimator_selector">
194 <param name="selected_module" value="ensemble"/>
195 <param name="selected_estimator" value="AdaBoostClassifier"/>
196 </conditional>
197 </conditional>
198 </conditional>
199 </conditional>
200 <section name="final_estimator">
201 <param name="selected_module" value="svm"/>
202 <param name="selected_estimator" value="LinearSVC"/>
203 </section>
204 <output name="outfile" file="pipeline04" compare="sim_size" delta="1"/>
205 </test>
206 <test>
207 <conditional name="component_selector">
208 <param name="component_type" value="None"/>
209 </conditional>
210 <param name="selected_module" value="ensemble"/>
211 <param name="selected_estimator" value="RandomForestRegressor"/>
212 <param name="text_params" value="'n_estimators': 100, 'random_state': 42"/>
213 <output name="outfile" file="pipeline05" compare="sim_size" delta="1"/>
214 </test>
215 <test>
216 <conditional name="component_selector">
217 <param name="component_type" value="decomposition"/>
218 <conditional name="matrix_decomposition_selector">
219 <param name="select_algorithm" value="PCA"/>
220 </conditional>
221 </conditional>
222 <param name="selected_module" value="ensemble"/>
223 <param name="selected_estimator" value="AdaBoostRegressor"/>
224 <output name="outfile" file="pipeline06" compare="sim_size" delta="1"/>
225 </test>
226 <test>
227 <conditional name="component_selector">
228 <param name="component_type" value="kernel_approximation"/>
229 <conditional name="kernel_approximation_selector">
230 <param name="select_algorithm" value="RBFSampler"/>
231 <param name="text_params" value="'n_components': 10, 'gamma': 2.0"/>
232 </conditional>
233 </conditional>
234 <param name="selected_module" value="ensemble"/>
235 <param name="selected_estimator" value="AdaBoostClassifier"/>
236 <output name="outfile" file="pipeline07" compare="sim_size" delta="1"/>
237 </test>
238 <test>
239 <conditional name="component_selector">
240 <param name="component_type" value="FeatureAgglomeration"/>
241 <conditional name="FeatureAgglomeration_selector">
242 <param name="select_algorithm" value="FeatureAgglomeration"/>
243 <param name="text_params" value="'n_clusters': 3, 'affinity': 'euclidean'"/>
244 </conditional>
245 </conditional>
246 <param name="selected_module" value="ensemble"/>
247 <param name="selected_estimator" value="AdaBoostClassifier"/>
248 <output name="outfile" file="pipeline08" compare="sim_size" delta="1"/>
249 </test>
250 </tests>
251 <help>
252 <![CDATA[
253 **What it does**
254 Constructs a pipeline that contains a list of transfroms and a final estimator. Pipeline assembles several steps
255 that can be cross-validated together while setting different parameters.
256 please refer to `Scikit-learn pipeline Pipeline`_.
257
258 **Pre-processing components** allow None, one or a combination of up to 5 transformations from `sklearn.preprocessing`_, `feature_selection`_, `decomposition`_, `kernel_approximation`_ and/or `cluster.FeatureAgglomeration`_.
259
260 **Estimator** selector supports estimators from `xgboost`_ and many scikit-learn modules, including `svm`_, `linear_model`_, `ensemble`_, `naive_bayes`_, `tree`_ and `neighbors`_.
261
262
263 .. _`Scikit-learn pipeline Pipeline`: http://scikit-learn.org/stable/modules/generated/sklearn.pipeline.Pipeline.html
264 .. _`svm`: http://scikit-learn.org/stable/modules/classes.html#module-sklearn.svm
265 .. _`linear_model`: http://scikit-learn.org/stable/modules/classes.html#module-sklearn.linear_model
266 .. _`ensemble`: http://scikit-learn.org/stable/modules/classes.html#module-sklearn.ensemble
267 .. _`naive_bayes`: http://scikit-learn.org/stable/modules/classes.html#module-sklearn.naive_bayes
268 .. _`tree`: http://scikit-learn.org/stable/modules/classes.html#module-sklearn.tree
269 .. _`neighbors`: http://scikit-learn.org/stable/modules/classes.html#module-sklearn.neighbors
270 .. _`xgboost`: https://xgboost.readthedocs.io/en/latest/python/python_api.html
271
272 .. _`sklearn.preprocessing`: http://scikit-learn.org/stable/modules/classes.html#module-sklearn.preprocessing
273 .. _`feature_selection`: http://scikit-learn.org/stable/modules/classes.html#module-sklearn.feature_selection
274 .. _`decomposition`: http://scikit-learn.org/stable/modules/classes.html#module-sklearn.decomposition
275 .. _`kernel_approximation`: http://scikit-learn.org/stable/modules/classes.html#module-sklearn.kernel_approximation
276 .. _`cluster.FeatureAgglomeration`: http://scikit-learn.org/stable/modules/generated/sklearn.cluster.FeatureAgglomeration.html
277
278 ]]>
279 </help>
280 <expand macro="sklearn_citation"/>
281 </tool>