comparison pipeline.xml @ 3:0857964e07c2 draft

planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
author bgruening
date Fri, 17 Aug 2018 12:26:40 -0400
parents 73535ddcfa69
children 86450dde8682
comparison
equal deleted inserted replaced
2:9ddacd0b8c8b 3:0857964e07c2
1 <tool id="sklearn_build_pipeline" name="Pipeline Builder" version="@VERSION@"> 1 <tool id="sklearn_build_pipeline" name="Pipeline Builder" version="@VERSION@">
2 <description>constructs a list of transforms and a final estimator</description> 2 <description>constructs a list of transforms and a final estimator</description>
3 <macros> 3 <macros>
4 <import>main_macros.xml</import> 4 <import>main_macros.xml</import>
5 </macros> 5 </macros>
6 <expand macro="python_requirements"/> 6 <expand macro="python_requirements">
7 <requirement type="package" version="0.6">skrebate</requirement>
8 </expand>
7 <expand macro="macro_stdio"/> 9 <expand macro="macro_stdio"/>
8 <version_command>echo "@VERSION@"</version_command> 10 <version_command>echo "@VERSION@"</version_command>
9 <command> 11 <command>
10 <![CDATA[ 12 <![CDATA[
11 python "$sklearn_pipeline_script" '$inputs' 13 python "$sklearn_pipeline_script" '$inputs'
14 <configfiles> 16 <configfiles>
15 <inputs name="inputs" /> 17 <inputs name="inputs" />
16 <configfile name="sklearn_pipeline_script"> 18 <configfile name="sklearn_pipeline_script">
17 <![CDATA[ 19 <![CDATA[
18 import sys 20 import sys
21 import os
19 import json 22 import json
20 import pickle 23 import pickle
21 import pprint 24 import pprint
22 import xgboost 25 import skrebate
23 import ast 26 from sklearn import (preprocessing, svm, linear_model, ensemble, naive_bayes,
24 import sklearn.feature_selection
25 from sklearn import ( preprocessing, svm, linear_model, ensemble, naive_bayes,
26 tree, neighbors, decomposition, kernel_approximation, cluster) 27 tree, neighbors, decomposition, kernel_approximation, cluster)
27 from sklearn.pipeline import Pipeline 28 from sklearn.pipeline import Pipeline
28 29
29 @GET_ESTIMATOR_FUNCTION@ 30 execfile("$__tool_directory__/utils.py")
30 @FEATURE_SELECTOR_FUNCTION@ 31
32 safe_eval = SafeEval()
31 33
32 input_json_path = sys.argv[1] 34 input_json_path = sys.argv[1]
33 with open(input_json_path, "r") as param_handler: 35 with open(input_json_path, "r") as param_handler:
34 params = json.load(param_handler) 36 params = json.load(param_handler)
35 37
43 sys.exit("The pre-processing component type can't be None when the number of components is greater than 1.") 45 sys.exit("The pre-processing component type can't be None when the number of components is greater than 1.")
44 if input_json['component_type'] == 'pre_processor': 46 if input_json['component_type'] == 'pre_processor':
45 preprocessor = input_json["pre_processors"]["selected_pre_processor"] 47 preprocessor = input_json["pre_processors"]["selected_pre_processor"]
46 pre_processor_options = input_json["pre_processors"]["options"] 48 pre_processor_options = input_json["pre_processors"]["options"]
47 my_class = getattr(preprocessing, preprocessor) 49 my_class = getattr(preprocessing, preprocessor)
48 return my_class(**pre_processor_options) 50 obj = my_class(**pre_processor_options)
49 if input_json['component_type'] == 'feature_selection': 51 elif input_json['component_type'] == 'feature_selection':
50 fs_obj = feature_selector(input_json['fs_algorithm_selector']) 52 obj = feature_selector(input_json['fs_algorithm_selector'])
51 return fs_obj 53 elif input_json['component_type'] == 'decomposition':
52 if input_json['component_type'] == 'decomposition':
53 algorithm = input_json['matrix_decomposition_selector']['select_algorithm'] 54 algorithm = input_json['matrix_decomposition_selector']['select_algorithm']
54 obj = getattr(decomposition, algorithm)() 55 obj = getattr(decomposition, algorithm)()
55 options = input_json['matrix_decomposition_selector']['text_params'].strip() 56 options = input_json['matrix_decomposition_selector']['text_params'].strip()
56 if options != "": 57 if options != "":
57 options = ast.literal_eval('{' + options + '}') 58 options = safe_eval('dict(' + options + ')')
58 obj.set_params(**options) 59 obj.set_params(**options)
59 return obj 60 elif input_json['component_type'] == 'kernel_approximation':
60 if input_json['component_type'] == 'kernel_approximation':
61 algorithm = input_json['kernel_approximation_selector']['select_algorithm'] 61 algorithm = input_json['kernel_approximation_selector']['select_algorithm']
62 obj = getattr(kernel_approximation, algorithm)() 62 obj = getattr(kernel_approximation, algorithm)()
63 options = input_json['kernel_approximation_selector']['text_params'].strip() 63 options = input_json['kernel_approximation_selector']['text_params'].strip()
64 if options != "": 64 if options != "":
65 options = ast.literal_eval('{' + options + '}') 65 options = safe_eval('dict(' + options + ')')
66 obj.set_params(**options) 66 obj.set_params(**options)
67 return obj 67 elif input_json['component_type'] == 'FeatureAgglomeration':
68 if input_json['component_type'] == 'FeatureAgglomeration':
69 algorithm = input_json['FeatureAgglomeration_selector']['select_algorithm'] 68 algorithm = input_json['FeatureAgglomeration_selector']['select_algorithm']
70 obj = getattr(cluster, algorithm)() 69 obj = getattr(cluster, algorithm)()
71 options = input_json['FeatureAgglomeration_selector']['text_params'].strip() 70 options = input_json['FeatureAgglomeration_selector']['text_params'].strip()
72 if options != "": 71 if options != "":
73 options = ast.literal_eval('{' + options + '}') 72 options = safe_eval('dict(' + options + ')')
74 obj.set_params(**options) 73 obj.set_params(**options)
75 return obj 74 elif input_json['component_type'] == 'skrebate':
75 algorithm = input_json['skrebate_selector']['select_algorithm']
76 if algorithm == 'TuRF':
77 obj = getattr(skrebate, algorithm)(core_algorithm='ReliefF')
78 else:
79 obj = getattr(skrebate, algorithm)()
80 options = input_json['skrebate_selector']['text_params'].strip()
81 if options != "":
82 options = safe_eval('dict(' + options + ')')
83 obj.set_params(**options)
84 if 'n_jobs' in obj.get_params():
85 obj.set_params( n_jobs=N_JOBS )
86 return obj
87
76 if len(params['pipeline_component']) == 1: 88 if len(params['pipeline_component']) == 1:
77 step_obj = get_component( params['pipeline_component'][0]['component_selector']) 89 step_obj = get_component( params['pipeline_component'][0]['component_selector'])
78 if step_obj: 90 if step_obj:
79 pipeline_steps.append( ('preprocessing_1', step_obj) ) 91 pipeline_steps.append( ('preprocessing_1', step_obj) )
80 else: 92 else:
99 </configfiles> 111 </configfiles>
100 <inputs> 112 <inputs>
101 <repeat name="pipeline_component" min="1" max="5" title="Pre-processing step"> 113 <repeat name="pipeline_component" min="1" max="5" title="Pre-processing step">
102 <conditional name="component_selector"> 114 <conditional name="component_selector">
103 <param name="component_type" type="select" label="Choose the type of transformation:"> 115 <param name="component_type" type="select" label="Choose the type of transformation:">
104 <option value="none" selected="true">None</option> 116 <option value="None" selected="true">None</option>
105 <option value="pre_processor">Sklearn Preprocessor</option> 117 <option value="pre_processor">Sklearn Preprocessor</option>
106 <option value="feature_selection">Feature Selection</option> 118 <option value="feature_selection">Feature Selection</option>
107 <option value="decomposition">Matrix Decomposition</option> 119 <option value="decomposition">Matrix Decomposition</option>
108 <option value="kernel_approximation">Kernel Approximation</option> 120 <option value="kernel_approximation">Kernel Approximation</option>
109 <option value="FeatureAgglomeration">Agglomerate Features</option> 121 <option value="FeatureAgglomeration">Agglomerate Features</option>
122 <option value="skrebate">Skrebate algorithm</option>
110 </param> 123 </param>
111 <when value="none"/> 124 <when value="None"/>
112 <when value="pre_processor"> 125 <when value="pre_processor">
113 <conditional name="pre_processors"> 126 <conditional name="pre_processors">
114 <expand macro="sparse_preprocessors_ext" /> 127 <expand macro="sparse_preprocessors_ext" />
115 <expand macro="sparse_preprocessor_options_ext" /> 128 <expand macro="sparse_preprocessor_options_ext" />
116 </conditional> 129 </conditional>
126 <when value="kernel_approximation"> 139 <when value="kernel_approximation">
127 <expand macro="kernel_approximation_all"/> 140 <expand macro="kernel_approximation_all"/>
128 </when> 141 </when>
129 <when value="FeatureAgglomeration"> 142 <when value="FeatureAgglomeration">
130 <expand macro="FeatureAgglomeration"/> 143 <expand macro="FeatureAgglomeration"/>
144 </when>
145 <when value="skrebate">
146 <expand macro="skrebate"/>
131 </when> 147 </when>
132 </conditional> 148 </conditional>
133 </repeat> 149 </repeat>
134 <section name="final_estimator" title="Final Estimator" expanded="true"> 150 <section name="final_estimator" title="Final Estimator" expanded="true">
135 <expand macro="estimator_selector_all" /> 151 <expand macro="estimator_selector_all" />
157 </conditional> 173 </conditional>
158 </conditional> 174 </conditional>
159 </repeat> 175 </repeat>
160 <param name="selected_module" value="svm"/> 176 <param name="selected_module" value="svm"/>
161 <param name="selected_estimator" value="SVR"/> 177 <param name="selected_estimator" value="SVR"/>
162 <param name="text_params" value="'kernel': 'linear'"/> 178 <param name="text_params" value="kernel='linear'"/>
163 <output name="outfile" file="pipeline01" compare="sim_size" delta="1"/> 179 <output name="outfile" file="pipeline01" compare="sim_size" delta="1"/>
164 </test> 180 </test>
165 <test> 181 <test>
166 <conditional name="component_selector"> 182 <conditional name="component_selector">
167 <param name="component_type" value="pre_processor"/> 183 <param name="component_type" value="pre_processor"/>
207 <conditional name="component_selector"> 223 <conditional name="component_selector">
208 <param name="component_type" value="None"/> 224 <param name="component_type" value="None"/>
209 </conditional> 225 </conditional>
210 <param name="selected_module" value="ensemble"/> 226 <param name="selected_module" value="ensemble"/>
211 <param name="selected_estimator" value="RandomForestRegressor"/> 227 <param name="selected_estimator" value="RandomForestRegressor"/>
212 <param name="text_params" value="'n_estimators': 100, 'random_state': 42"/> 228 <param name="text_params" value="n_estimators=100, random_state=42"/>
213 <output name="outfile" file="pipeline05" compare="sim_size" delta="1"/> 229 <output name="outfile" file="pipeline05" compare="sim_size" delta="1"/>
214 </test> 230 </test>
215 <test> 231 <test>
216 <conditional name="component_selector"> 232 <conditional name="component_selector">
217 <param name="component_type" value="decomposition"/> 233 <param name="component_type" value="decomposition"/>
226 <test> 242 <test>
227 <conditional name="component_selector"> 243 <conditional name="component_selector">
228 <param name="component_type" value="kernel_approximation"/> 244 <param name="component_type" value="kernel_approximation"/>
229 <conditional name="kernel_approximation_selector"> 245 <conditional name="kernel_approximation_selector">
230 <param name="select_algorithm" value="RBFSampler"/> 246 <param name="select_algorithm" value="RBFSampler"/>
231 <param name="text_params" value="'n_components': 10, 'gamma': 2.0"/> 247 <param name="text_params" value="n_components=10, gamma=2.0"/>
232 </conditional> 248 </conditional>
233 </conditional> 249 </conditional>
234 <param name="selected_module" value="ensemble"/> 250 <param name="selected_module" value="ensemble"/>
235 <param name="selected_estimator" value="AdaBoostClassifier"/> 251 <param name="selected_estimator" value="AdaBoostClassifier"/>
236 <output name="outfile" file="pipeline07" compare="sim_size" delta="1"/> 252 <output name="outfile" file="pipeline07" compare="sim_size" delta="1"/>
238 <test> 254 <test>
239 <conditional name="component_selector"> 255 <conditional name="component_selector">
240 <param name="component_type" value="FeatureAgglomeration"/> 256 <param name="component_type" value="FeatureAgglomeration"/>
241 <conditional name="FeatureAgglomeration_selector"> 257 <conditional name="FeatureAgglomeration_selector">
242 <param name="select_algorithm" value="FeatureAgglomeration"/> 258 <param name="select_algorithm" value="FeatureAgglomeration"/>
243 <param name="text_params" value="'n_clusters': 3, 'affinity': 'euclidean'"/> 259 <param name="text_params" value="n_clusters=3, affinity='euclidean'"/>
244 </conditional> 260 </conditional>
245 </conditional> 261 </conditional>
246 <param name="selected_module" value="ensemble"/> 262 <param name="selected_module" value="ensemble"/>
247 <param name="selected_estimator" value="AdaBoostClassifier"/> 263 <param name="selected_estimator" value="AdaBoostClassifier"/>
248 <output name="outfile" file="pipeline08" compare="sim_size" delta="1"/> 264 <output name="outfile" file="pipeline08" compare="sim_size" delta="1"/>
265 </test>
266 <test>
267 <conditional name="component_selector">
268 <param name="component_type" value="skrebate"/>
269 <conditional name="skrebate_selector">
270 <param name="select_algorithm" value="ReliefF"/>
271 <param name="text_params" value="n_features_to_select=3, n_neighbors=100"/>
272 </conditional>
273 </conditional>
274 <param name="selected_module" value="ensemble"/>
275 <param name="selected_estimator" value="RandomForestRegressor"/>
276 <output name="outfile" file="pipeline09" compare="sim_size" delta="1"/>
277 </test>
278 <test>
279 <conditional name="component_selector">
280 <param name="component_type" value="skrebate"/>
281 <conditional name="skrebate_selector">
282 <param name="select_algorithm" value="TuRF"/>
283 <param name="text_params" value=""/>
284 </conditional>
285 </conditional>
286 <param name="selected_module" value="ensemble"/>
287 <param name="selected_estimator" value="RandomForestRegressor"/>
288 <output name="outfile" file="pipeline10" compare="sim_size" delta="1"/>
249 </test> 289 </test>
250 </tests> 290 </tests>
251 <help> 291 <help>
252 <![CDATA[ 292 <![CDATA[
253 **What it does** 293 **What it does**
254 Constructs a pipeline that contains a list of transfroms and a final estimator. Pipeline assembles several steps 294 Constructs a pipeline that contains a list of transfroms and a final estimator. Pipeline assembles several steps
255 that can be cross-validated together while setting different parameters. 295 that can be cross-validated together while setting different parameters.
256 please refer to `Scikit-learn pipeline Pipeline`_. 296 please refer to `Scikit-learn pipeline Pipeline`_.
257 297
258 **Pre-processing components** allow None, one or a combination of up to 5 transformations from `sklearn.preprocessing`_, `feature_selection`_, `decomposition`_, `kernel_approximation`_ and/or `cluster.FeatureAgglomeration`_. 298 **Pre-processing components** allow None, one or a combination of up to 5 transformations from `sklearn.preprocessing`_, `feature_selection`_, `decomposition`_, `kernel_approximation`_, `cluster.FeatureAgglomeration`_ and/or `skrebate`_.
259 299
260 **Estimator** selector supports estimators from `xgboost`_ and many scikit-learn modules, including `svm`_, `linear_model`_, `ensemble`_, `naive_bayes`_, `tree`_ and `neighbors`_. 300 **Estimator** selector supports estimators from `xgboost`_ and many scikit-learn modules, including `svm`_, `linear_model`_, `ensemble`_, `naive_bayes`_, `tree`_ and `neighbors`_.
261 301
262 302
263 .. _`Scikit-learn pipeline Pipeline`: http://scikit-learn.org/stable/modules/generated/sklearn.pipeline.Pipeline.html 303 .. _`Scikit-learn pipeline Pipeline`: http://scikit-learn.org/stable/modules/generated/sklearn.pipeline.Pipeline.html
272 .. _`sklearn.preprocessing`: http://scikit-learn.org/stable/modules/classes.html#module-sklearn.preprocessing 312 .. _`sklearn.preprocessing`: http://scikit-learn.org/stable/modules/classes.html#module-sklearn.preprocessing
273 .. _`feature_selection`: http://scikit-learn.org/stable/modules/classes.html#module-sklearn.feature_selection 313 .. _`feature_selection`: http://scikit-learn.org/stable/modules/classes.html#module-sklearn.feature_selection
274 .. _`decomposition`: http://scikit-learn.org/stable/modules/classes.html#module-sklearn.decomposition 314 .. _`decomposition`: http://scikit-learn.org/stable/modules/classes.html#module-sklearn.decomposition
275 .. _`kernel_approximation`: http://scikit-learn.org/stable/modules/classes.html#module-sklearn.kernel_approximation 315 .. _`kernel_approximation`: http://scikit-learn.org/stable/modules/classes.html#module-sklearn.kernel_approximation
276 .. _`cluster.FeatureAgglomeration`: http://scikit-learn.org/stable/modules/generated/sklearn.cluster.FeatureAgglomeration.html 316 .. _`cluster.FeatureAgglomeration`: http://scikit-learn.org/stable/modules/generated/sklearn.cluster.FeatureAgglomeration.html
317 .. _`skrebate`: https://epistasislab.github.io/scikit-rebate/using/
277 318
278 ]]> 319 ]]>
279 </help> 320 </help>
280 <expand macro="sklearn_citation"/> 321 <expand macro="sklearn_citation"/>
281 </tool> 322 </tool>