Mercurial > repos > bgruening > sklearn_build_pipeline
comparison pipeline.xml @ 3:0857964e07c2 draft
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
author | bgruening |
---|---|
date | Fri, 17 Aug 2018 12:26:40 -0400 |
parents | 73535ddcfa69 |
children | 86450dde8682 |
comparison
equal
deleted
inserted
replaced
2:9ddacd0b8c8b | 3:0857964e07c2 |
---|---|
1 <tool id="sklearn_build_pipeline" name="Pipeline Builder" version="@VERSION@"> | 1 <tool id="sklearn_build_pipeline" name="Pipeline Builder" version="@VERSION@"> |
2 <description>constructs a list of transforms and a final estimator</description> | 2 <description>constructs a list of transforms and a final estimator</description> |
3 <macros> | 3 <macros> |
4 <import>main_macros.xml</import> | 4 <import>main_macros.xml</import> |
5 </macros> | 5 </macros> |
6 <expand macro="python_requirements"/> | 6 <expand macro="python_requirements"> |
7 <requirement type="package" version="0.6">skrebate</requirement> | |
8 </expand> | |
7 <expand macro="macro_stdio"/> | 9 <expand macro="macro_stdio"/> |
8 <version_command>echo "@VERSION@"</version_command> | 10 <version_command>echo "@VERSION@"</version_command> |
9 <command> | 11 <command> |
10 <![CDATA[ | 12 <![CDATA[ |
11 python "$sklearn_pipeline_script" '$inputs' | 13 python "$sklearn_pipeline_script" '$inputs' |
14 <configfiles> | 16 <configfiles> |
15 <inputs name="inputs" /> | 17 <inputs name="inputs" /> |
16 <configfile name="sklearn_pipeline_script"> | 18 <configfile name="sklearn_pipeline_script"> |
17 <![CDATA[ | 19 <![CDATA[ |
18 import sys | 20 import sys |
21 import os | |
19 import json | 22 import json |
20 import pickle | 23 import pickle |
21 import pprint | 24 import pprint |
22 import xgboost | 25 import skrebate |
23 import ast | 26 from sklearn import (preprocessing, svm, linear_model, ensemble, naive_bayes, |
24 import sklearn.feature_selection | |
25 from sklearn import ( preprocessing, svm, linear_model, ensemble, naive_bayes, | |
26 tree, neighbors, decomposition, kernel_approximation, cluster) | 27 tree, neighbors, decomposition, kernel_approximation, cluster) |
27 from sklearn.pipeline import Pipeline | 28 from sklearn.pipeline import Pipeline |
28 | 29 |
29 @GET_ESTIMATOR_FUNCTION@ | 30 execfile("$__tool_directory__/utils.py") |
30 @FEATURE_SELECTOR_FUNCTION@ | 31 |
32 safe_eval = SafeEval() | |
31 | 33 |
32 input_json_path = sys.argv[1] | 34 input_json_path = sys.argv[1] |
33 with open(input_json_path, "r") as param_handler: | 35 with open(input_json_path, "r") as param_handler: |
34 params = json.load(param_handler) | 36 params = json.load(param_handler) |
35 | 37 |
43 sys.exit("The pre-processing component type can't be None when the number of components is greater than 1.") | 45 sys.exit("The pre-processing component type can't be None when the number of components is greater than 1.") |
44 if input_json['component_type'] == 'pre_processor': | 46 if input_json['component_type'] == 'pre_processor': |
45 preprocessor = input_json["pre_processors"]["selected_pre_processor"] | 47 preprocessor = input_json["pre_processors"]["selected_pre_processor"] |
46 pre_processor_options = input_json["pre_processors"]["options"] | 48 pre_processor_options = input_json["pre_processors"]["options"] |
47 my_class = getattr(preprocessing, preprocessor) | 49 my_class = getattr(preprocessing, preprocessor) |
48 return my_class(**pre_processor_options) | 50 obj = my_class(**pre_processor_options) |
49 if input_json['component_type'] == 'feature_selection': | 51 elif input_json['component_type'] == 'feature_selection': |
50 fs_obj = feature_selector(input_json['fs_algorithm_selector']) | 52 obj = feature_selector(input_json['fs_algorithm_selector']) |
51 return fs_obj | 53 elif input_json['component_type'] == 'decomposition': |
52 if input_json['component_type'] == 'decomposition': | |
53 algorithm = input_json['matrix_decomposition_selector']['select_algorithm'] | 54 algorithm = input_json['matrix_decomposition_selector']['select_algorithm'] |
54 obj = getattr(decomposition, algorithm)() | 55 obj = getattr(decomposition, algorithm)() |
55 options = input_json['matrix_decomposition_selector']['text_params'].strip() | 56 options = input_json['matrix_decomposition_selector']['text_params'].strip() |
56 if options != "": | 57 if options != "": |
57 options = ast.literal_eval('{' + options + '}') | 58 options = safe_eval('dict(' + options + ')') |
58 obj.set_params(**options) | 59 obj.set_params(**options) |
59 return obj | 60 elif input_json['component_type'] == 'kernel_approximation': |
60 if input_json['component_type'] == 'kernel_approximation': | |
61 algorithm = input_json['kernel_approximation_selector']['select_algorithm'] | 61 algorithm = input_json['kernel_approximation_selector']['select_algorithm'] |
62 obj = getattr(kernel_approximation, algorithm)() | 62 obj = getattr(kernel_approximation, algorithm)() |
63 options = input_json['kernel_approximation_selector']['text_params'].strip() | 63 options = input_json['kernel_approximation_selector']['text_params'].strip() |
64 if options != "": | 64 if options != "": |
65 options = ast.literal_eval('{' + options + '}') | 65 options = safe_eval('dict(' + options + ')') |
66 obj.set_params(**options) | 66 obj.set_params(**options) |
67 return obj | 67 elif input_json['component_type'] == 'FeatureAgglomeration': |
68 if input_json['component_type'] == 'FeatureAgglomeration': | |
69 algorithm = input_json['FeatureAgglomeration_selector']['select_algorithm'] | 68 algorithm = input_json['FeatureAgglomeration_selector']['select_algorithm'] |
70 obj = getattr(cluster, algorithm)() | 69 obj = getattr(cluster, algorithm)() |
71 options = input_json['FeatureAgglomeration_selector']['text_params'].strip() | 70 options = input_json['FeatureAgglomeration_selector']['text_params'].strip() |
72 if options != "": | 71 if options != "": |
73 options = ast.literal_eval('{' + options + '}') | 72 options = safe_eval('dict(' + options + ')') |
74 obj.set_params(**options) | 73 obj.set_params(**options) |
75 return obj | 74 elif input_json['component_type'] == 'skrebate': |
75 algorithm = input_json['skrebate_selector']['select_algorithm'] | |
76 if algorithm == 'TuRF': | |
77 obj = getattr(skrebate, algorithm)(core_algorithm='ReliefF') | |
78 else: | |
79 obj = getattr(skrebate, algorithm)() | |
80 options = input_json['skrebate_selector']['text_params'].strip() | |
81 if options != "": | |
82 options = safe_eval('dict(' + options + ')') | |
83 obj.set_params(**options) | |
84 if 'n_jobs' in obj.get_params(): | |
85 obj.set_params( n_jobs=N_JOBS ) | |
86 return obj | |
87 | |
76 if len(params['pipeline_component']) == 1: | 88 if len(params['pipeline_component']) == 1: |
77 step_obj = get_component( params['pipeline_component'][0]['component_selector']) | 89 step_obj = get_component( params['pipeline_component'][0]['component_selector']) |
78 if step_obj: | 90 if step_obj: |
79 pipeline_steps.append( ('preprocessing_1', step_obj) ) | 91 pipeline_steps.append( ('preprocessing_1', step_obj) ) |
80 else: | 92 else: |
99 </configfiles> | 111 </configfiles> |
100 <inputs> | 112 <inputs> |
101 <repeat name="pipeline_component" min="1" max="5" title="Pre-processing step"> | 113 <repeat name="pipeline_component" min="1" max="5" title="Pre-processing step"> |
102 <conditional name="component_selector"> | 114 <conditional name="component_selector"> |
103 <param name="component_type" type="select" label="Choose the type of transformation:"> | 115 <param name="component_type" type="select" label="Choose the type of transformation:"> |
104 <option value="none" selected="true">None</option> | 116 <option value="None" selected="true">None</option> |
105 <option value="pre_processor">Sklearn Preprocessor</option> | 117 <option value="pre_processor">Sklearn Preprocessor</option> |
106 <option value="feature_selection">Feature Selection</option> | 118 <option value="feature_selection">Feature Selection</option> |
107 <option value="decomposition">Matrix Decomposition</option> | 119 <option value="decomposition">Matrix Decomposition</option> |
108 <option value="kernel_approximation">Kernel Approximation</option> | 120 <option value="kernel_approximation">Kernel Approximation</option> |
109 <option value="FeatureAgglomeration">Agglomerate Features</option> | 121 <option value="FeatureAgglomeration">Agglomerate Features</option> |
122 <option value="skrebate">Skrebate algorithm</option> | |
110 </param> | 123 </param> |
111 <when value="none"/> | 124 <when value="None"/> |
112 <when value="pre_processor"> | 125 <when value="pre_processor"> |
113 <conditional name="pre_processors"> | 126 <conditional name="pre_processors"> |
114 <expand macro="sparse_preprocessors_ext" /> | 127 <expand macro="sparse_preprocessors_ext" /> |
115 <expand macro="sparse_preprocessor_options_ext" /> | 128 <expand macro="sparse_preprocessor_options_ext" /> |
116 </conditional> | 129 </conditional> |
126 <when value="kernel_approximation"> | 139 <when value="kernel_approximation"> |
127 <expand macro="kernel_approximation_all"/> | 140 <expand macro="kernel_approximation_all"/> |
128 </when> | 141 </when> |
129 <when value="FeatureAgglomeration"> | 142 <when value="FeatureAgglomeration"> |
130 <expand macro="FeatureAgglomeration"/> | 143 <expand macro="FeatureAgglomeration"/> |
144 </when> | |
145 <when value="skrebate"> | |
146 <expand macro="skrebate"/> | |
131 </when> | 147 </when> |
132 </conditional> | 148 </conditional> |
133 </repeat> | 149 </repeat> |
134 <section name="final_estimator" title="Final Estimator" expanded="true"> | 150 <section name="final_estimator" title="Final Estimator" expanded="true"> |
135 <expand macro="estimator_selector_all" /> | 151 <expand macro="estimator_selector_all" /> |
157 </conditional> | 173 </conditional> |
158 </conditional> | 174 </conditional> |
159 </repeat> | 175 </repeat> |
160 <param name="selected_module" value="svm"/> | 176 <param name="selected_module" value="svm"/> |
161 <param name="selected_estimator" value="SVR"/> | 177 <param name="selected_estimator" value="SVR"/> |
162 <param name="text_params" value="'kernel': 'linear'"/> | 178 <param name="text_params" value="kernel='linear'"/> |
163 <output name="outfile" file="pipeline01" compare="sim_size" delta="1"/> | 179 <output name="outfile" file="pipeline01" compare="sim_size" delta="1"/> |
164 </test> | 180 </test> |
165 <test> | 181 <test> |
166 <conditional name="component_selector"> | 182 <conditional name="component_selector"> |
167 <param name="component_type" value="pre_processor"/> | 183 <param name="component_type" value="pre_processor"/> |
207 <conditional name="component_selector"> | 223 <conditional name="component_selector"> |
208 <param name="component_type" value="None"/> | 224 <param name="component_type" value="None"/> |
209 </conditional> | 225 </conditional> |
210 <param name="selected_module" value="ensemble"/> | 226 <param name="selected_module" value="ensemble"/> |
211 <param name="selected_estimator" value="RandomForestRegressor"/> | 227 <param name="selected_estimator" value="RandomForestRegressor"/> |
212 <param name="text_params" value="'n_estimators': 100, 'random_state': 42"/> | 228 <param name="text_params" value="n_estimators=100, random_state=42"/> |
213 <output name="outfile" file="pipeline05" compare="sim_size" delta="1"/> | 229 <output name="outfile" file="pipeline05" compare="sim_size" delta="1"/> |
214 </test> | 230 </test> |
215 <test> | 231 <test> |
216 <conditional name="component_selector"> | 232 <conditional name="component_selector"> |
217 <param name="component_type" value="decomposition"/> | 233 <param name="component_type" value="decomposition"/> |
226 <test> | 242 <test> |
227 <conditional name="component_selector"> | 243 <conditional name="component_selector"> |
228 <param name="component_type" value="kernel_approximation"/> | 244 <param name="component_type" value="kernel_approximation"/> |
229 <conditional name="kernel_approximation_selector"> | 245 <conditional name="kernel_approximation_selector"> |
230 <param name="select_algorithm" value="RBFSampler"/> | 246 <param name="select_algorithm" value="RBFSampler"/> |
231 <param name="text_params" value="'n_components': 10, 'gamma': 2.0"/> | 247 <param name="text_params" value="n_components=10, gamma=2.0"/> |
232 </conditional> | 248 </conditional> |
233 </conditional> | 249 </conditional> |
234 <param name="selected_module" value="ensemble"/> | 250 <param name="selected_module" value="ensemble"/> |
235 <param name="selected_estimator" value="AdaBoostClassifier"/> | 251 <param name="selected_estimator" value="AdaBoostClassifier"/> |
236 <output name="outfile" file="pipeline07" compare="sim_size" delta="1"/> | 252 <output name="outfile" file="pipeline07" compare="sim_size" delta="1"/> |
238 <test> | 254 <test> |
239 <conditional name="component_selector"> | 255 <conditional name="component_selector"> |
240 <param name="component_type" value="FeatureAgglomeration"/> | 256 <param name="component_type" value="FeatureAgglomeration"/> |
241 <conditional name="FeatureAgglomeration_selector"> | 257 <conditional name="FeatureAgglomeration_selector"> |
242 <param name="select_algorithm" value="FeatureAgglomeration"/> | 258 <param name="select_algorithm" value="FeatureAgglomeration"/> |
243 <param name="text_params" value="'n_clusters': 3, 'affinity': 'euclidean'"/> | 259 <param name="text_params" value="n_clusters=3, affinity='euclidean'"/> |
244 </conditional> | 260 </conditional> |
245 </conditional> | 261 </conditional> |
246 <param name="selected_module" value="ensemble"/> | 262 <param name="selected_module" value="ensemble"/> |
247 <param name="selected_estimator" value="AdaBoostClassifier"/> | 263 <param name="selected_estimator" value="AdaBoostClassifier"/> |
248 <output name="outfile" file="pipeline08" compare="sim_size" delta="1"/> | 264 <output name="outfile" file="pipeline08" compare="sim_size" delta="1"/> |
265 </test> | |
266 <test> | |
267 <conditional name="component_selector"> | |
268 <param name="component_type" value="skrebate"/> | |
269 <conditional name="skrebate_selector"> | |
270 <param name="select_algorithm" value="ReliefF"/> | |
271 <param name="text_params" value="n_features_to_select=3, n_neighbors=100"/> | |
272 </conditional> | |
273 </conditional> | |
274 <param name="selected_module" value="ensemble"/> | |
275 <param name="selected_estimator" value="RandomForestRegressor"/> | |
276 <output name="outfile" file="pipeline09" compare="sim_size" delta="1"/> | |
277 </test> | |
278 <test> | |
279 <conditional name="component_selector"> | |
280 <param name="component_type" value="skrebate"/> | |
281 <conditional name="skrebate_selector"> | |
282 <param name="select_algorithm" value="TuRF"/> | |
283 <param name="text_params" value=""/> | |
284 </conditional> | |
285 </conditional> | |
286 <param name="selected_module" value="ensemble"/> | |
287 <param name="selected_estimator" value="RandomForestRegressor"/> | |
288 <output name="outfile" file="pipeline10" compare="sim_size" delta="1"/> | |
249 </test> | 289 </test> |
250 </tests> | 290 </tests> |
251 <help> | 291 <help> |
252 <![CDATA[ | 292 <![CDATA[ |
253 **What it does** | 293 **What it does** |
254 Constructs a pipeline that contains a list of transfroms and a final estimator. Pipeline assembles several steps | 294 Constructs a pipeline that contains a list of transfroms and a final estimator. Pipeline assembles several steps |
255 that can be cross-validated together while setting different parameters. | 295 that can be cross-validated together while setting different parameters. |
256 please refer to `Scikit-learn pipeline Pipeline`_. | 296 please refer to `Scikit-learn pipeline Pipeline`_. |
257 | 297 |
258 **Pre-processing components** allow None, one or a combination of up to 5 transformations from `sklearn.preprocessing`_, `feature_selection`_, `decomposition`_, `kernel_approximation`_ and/or `cluster.FeatureAgglomeration`_. | 298 **Pre-processing components** allow None, one or a combination of up to 5 transformations from `sklearn.preprocessing`_, `feature_selection`_, `decomposition`_, `kernel_approximation`_, `cluster.FeatureAgglomeration`_ and/or `skrebate`_. |
259 | 299 |
260 **Estimator** selector supports estimators from `xgboost`_ and many scikit-learn modules, including `svm`_, `linear_model`_, `ensemble`_, `naive_bayes`_, `tree`_ and `neighbors`_. | 300 **Estimator** selector supports estimators from `xgboost`_ and many scikit-learn modules, including `svm`_, `linear_model`_, `ensemble`_, `naive_bayes`_, `tree`_ and `neighbors`_. |
261 | 301 |
262 | 302 |
263 .. _`Scikit-learn pipeline Pipeline`: http://scikit-learn.org/stable/modules/generated/sklearn.pipeline.Pipeline.html | 303 .. _`Scikit-learn pipeline Pipeline`: http://scikit-learn.org/stable/modules/generated/sklearn.pipeline.Pipeline.html |
272 .. _`sklearn.preprocessing`: http://scikit-learn.org/stable/modules/classes.html#module-sklearn.preprocessing | 312 .. _`sklearn.preprocessing`: http://scikit-learn.org/stable/modules/classes.html#module-sklearn.preprocessing |
273 .. _`feature_selection`: http://scikit-learn.org/stable/modules/classes.html#module-sklearn.feature_selection | 313 .. _`feature_selection`: http://scikit-learn.org/stable/modules/classes.html#module-sklearn.feature_selection |
274 .. _`decomposition`: http://scikit-learn.org/stable/modules/classes.html#module-sklearn.decomposition | 314 .. _`decomposition`: http://scikit-learn.org/stable/modules/classes.html#module-sklearn.decomposition |
275 .. _`kernel_approximation`: http://scikit-learn.org/stable/modules/classes.html#module-sklearn.kernel_approximation | 315 .. _`kernel_approximation`: http://scikit-learn.org/stable/modules/classes.html#module-sklearn.kernel_approximation |
276 .. _`cluster.FeatureAgglomeration`: http://scikit-learn.org/stable/modules/generated/sklearn.cluster.FeatureAgglomeration.html | 316 .. _`cluster.FeatureAgglomeration`: http://scikit-learn.org/stable/modules/generated/sklearn.cluster.FeatureAgglomeration.html |
317 .. _`skrebate`: https://epistasislab.github.io/scikit-rebate/using/ | |
277 | 318 |
278 ]]> | 319 ]]> |
279 </help> | 320 </help> |
280 <expand macro="sklearn_citation"/> | 321 <expand macro="sklearn_citation"/> |
281 </tool> | 322 </tool> |