sklearn_build_pipeline: pipeline.xml comparison

comparison pipeline.xml @ 3:0857964e07c2 draft

planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7

author	bgruening
date	Fri, 17 Aug 2018 12:26:40 -0400
parents	73535ddcfa69
children	86450dde8682

comparison

equal deleted inserted replaced

-:9ddacd0b8c8b
+:0857964e07c2
 <tool id="sklearn_build_pipeline" name="Pipeline Builder" version="@VERSION@">
 <description>constructs a list of transforms and a final estimator</description>
 <macros>
 <import>main_macros.xml</import>
 </macros>
-<expand macro="python_requirements"/>
+<expand macro="python_requirements">
+<requirement type="package" version="0.6">skrebate</requirement>
+</expand>
 <expand macro="macro_stdio"/>
 <version_command>echo "@VERSION@"</version_command>
 <command>
 <![CDATA[
 python "$sklearn_pipeline_script" '$inputs'
 <configfiles>
 <inputs name="inputs" />
 <configfile name="sklearn_pipeline_script">
 <![CDATA[
 import sys
+import os
 import json
 import pickle
 import pprint
-import xgboost
+import skrebate
-import ast
+from sklearn import (preprocessing, svm, linear_model, ensemble, naive_bayes,
-import sklearn.feature_selection
-from sklearn import ( preprocessing, svm, linear_model, ensemble, naive_bayes,
 tree, neighbors, decomposition, kernel_approximation, cluster)
 from sklearn.pipeline import Pipeline
-@GET_ESTIMATOR_FUNCTION@
+execfile("$__tool_directory__/utils.py")
-@FEATURE_SELECTOR_FUNCTION@
+safe_eval = SafeEval()
 input_json_path = sys.argv[1]
 with open(input_json_path, "r") as param_handler:
 params = json.load(param_handler)
 sys.exit("The pre-processing component type can't be None when the number of components is greater than 1.")
 if input_json['component_type'] == 'pre_processor':
 preprocessor = input_json["pre_processors"]["selected_pre_processor"]
 pre_processor_options = input_json["pre_processors"]["options"]
 my_class = getattr(preprocessing, preprocessor)
-return my_class(**pre_processor_options)
+obj = my_class(**pre_processor_options)
-if input_json['component_type'] == 'feature_selection':
+elif input_json['component_type'] == 'feature_selection':
-fs_obj = feature_selector(input_json['fs_algorithm_selector'])
+obj = feature_selector(input_json['fs_algorithm_selector'])
-return fs_obj
+elif input_json['component_type'] == 'decomposition':
-if input_json['component_type'] == 'decomposition':
 algorithm = input_json['matrix_decomposition_selector']['select_algorithm']
 obj = getattr(decomposition, algorithm)()
 options = input_json['matrix_decomposition_selector']['text_params'].strip()
 if options != "":
-options = ast.literal_eval('{' + options + '}')
+options = safe_eval('dict(' + options + ')')
 obj.set_params(**options)
-return obj
+elif input_json['component_type'] == 'kernel_approximation':
-if input_json['component_type'] == 'kernel_approximation':
 algorithm = input_json['kernel_approximation_selector']['select_algorithm']
 obj = getattr(kernel_approximation, algorithm)()
 options = input_json['kernel_approximation_selector']['text_params'].strip()
 if options != "":
-options = ast.literal_eval('{' + options + '}')
+options = safe_eval('dict(' + options + ')')
 obj.set_params(**options)
-return obj
+elif input_json['component_type'] == 'FeatureAgglomeration':
-if input_json['component_type'] == 'FeatureAgglomeration':
 algorithm = input_json['FeatureAgglomeration_selector']['select_algorithm']
 obj = getattr(cluster, algorithm)()
 options = input_json['FeatureAgglomeration_selector']['text_params'].strip()
 if options != "":
-options = ast.literal_eval('{' + options + '}')
+options = safe_eval('dict(' + options + ')')
 obj.set_params(**options)
-return obj
+elif input_json['component_type'] == 'skrebate':
+algorithm = input_json['skrebate_selector']['select_algorithm']
+if algorithm == 'TuRF':
+obj = getattr(skrebate, algorithm)(core_algorithm='ReliefF')
+else:
+obj = getattr(skrebate, algorithm)()
+options = input_json['skrebate_selector']['text_params'].strip()
+if options != "":
+options = safe_eval('dict(' + options + ')')
+obj.set_params(**options)
+if 'n_jobs' in obj.get_params():
+obj.set_params( n_jobs=N_JOBS )
+return obj
 if len(params['pipeline_component']) == 1:
 step_obj = get_component( params['pipeline_component'][0]['component_selector'])
 if step_obj:
 pipeline_steps.append( ('preprocessing_1', step_obj) )
 else:
 </configfiles>
 <inputs>
 <repeat name="pipeline_component" min="1" max="5" title="Pre-processing step">
 <conditional name="component_selector">
 <param name="component_type" type="select" label="Choose the type of transformation:">
-<option value="none" selected="true">None</option>
+<option value="None" selected="true">None</option>
 <option value="pre_processor">Sklearn Preprocessor</option>
 <option value="feature_selection">Feature Selection</option>
 <option value="decomposition">Matrix Decomposition</option>
 <option value="kernel_approximation">Kernel Approximation</option>
 <option value="FeatureAgglomeration">Agglomerate Features</option>
+<option value="skrebate">Skrebate algorithm</option>
 </param>
-<when value="none"/>
+<when value="None"/>
 <when value="pre_processor">
 <conditional name="pre_processors">
 <expand macro="sparse_preprocessors_ext" />
 <expand macro="sparse_preprocessor_options_ext" />
 </conditional>
 <when value="kernel_approximation">
 <expand macro="kernel_approximation_all"/>
 </when>
 <when value="FeatureAgglomeration">
 <expand macro="FeatureAgglomeration"/>
+</when>
+<when value="skrebate">
+<expand macro="skrebate"/>
 </when>
 </conditional>
 </repeat>
 <section name="final_estimator" title="Final Estimator" expanded="true">
 <expand macro="estimator_selector_all" />
 </conditional>
 </conditional>
 </repeat>
 <param name="selected_module" value="svm"/>
 <param name="selected_estimator" value="SVR"/>
-<param name="text_params" value="'kernel': 'linear'"/>
+<param name="text_params" value="kernel='linear'"/>
 <output name="outfile" file="pipeline01" compare="sim_size" delta="1"/>
 </test>
 <test>
 <conditional name="component_selector">
 <param name="component_type" value="pre_processor"/>
 <conditional name="component_selector">
 <param name="component_type" value="None"/>
 </conditional>
 <param name="selected_module" value="ensemble"/>
 <param name="selected_estimator" value="RandomForestRegressor"/>
-<param name="text_params" value="'n_estimators': 100, 'random_state': 42"/>
+<param name="text_params" value="n_estimators=100, random_state=42"/>
 <output name="outfile" file="pipeline05" compare="sim_size" delta="1"/>
 </test>
 <test>
 <conditional name="component_selector">
 <param name="component_type" value="decomposition"/>
 <test>
 <conditional name="component_selector">
 <param name="component_type" value="kernel_approximation"/>
 <conditional name="kernel_approximation_selector">
 <param name="select_algorithm" value="RBFSampler"/>
-<param name="text_params" value="'n_components': 10, 'gamma': 2.0"/>
+<param name="text_params" value="n_components=10, gamma=2.0"/>
 </conditional>
 </conditional>
 <param name="selected_module" value="ensemble"/>
 <param name="selected_estimator" value="AdaBoostClassifier"/>
 <output name="outfile" file="pipeline07" compare="sim_size" delta="1"/>
 <test>
 <conditional name="component_selector">
 <param name="component_type" value="FeatureAgglomeration"/>
 <conditional name="FeatureAgglomeration_selector">
 <param name="select_algorithm" value="FeatureAgglomeration"/>
-<param name="text_params" value="'n_clusters': 3, 'affinity': 'euclidean'"/>
+<param name="text_params" value="n_clusters=3, affinity='euclidean'"/>
 </conditional>
 </conditional>
 <param name="selected_module" value="ensemble"/>
 <param name="selected_estimator" value="AdaBoostClassifier"/>
 <output name="outfile" file="pipeline08" compare="sim_size" delta="1"/>
+</test>
+<test>
+<conditional name="component_selector">
+<param name="component_type" value="skrebate"/>
+<conditional name="skrebate_selector">
+<param name="select_algorithm" value="ReliefF"/>
+<param name="text_params" value="n_features_to_select=3, n_neighbors=100"/>
+</conditional>
+</conditional>
+<param name="selected_module" value="ensemble"/>
+<param name="selected_estimator" value="RandomForestRegressor"/>
+<output name="outfile" file="pipeline09" compare="sim_size" delta="1"/>
+</test>
+<test>
+<conditional name="component_selector">
+<param name="component_type" value="skrebate"/>
+<conditional name="skrebate_selector">
+<param name="select_algorithm" value="TuRF"/>
+<param name="text_params" value=""/>
+</conditional>
+</conditional>
+<param name="selected_module" value="ensemble"/>
+<param name="selected_estimator" value="RandomForestRegressor"/>
+<output name="outfile" file="pipeline10" compare="sim_size" delta="1"/>
 </test>
 </tests>
 <help>
 <![CDATA[
 **What it does**
 Constructs a pipeline that contains a list of transfroms and a final estimator. Pipeline assembles several steps
 that can be cross-validated together while setting different parameters.
 please refer to `Scikit-learn pipeline Pipeline`_.
-**Pre-processing components** allow None, one or a combination of up to 5 transformations from `sklearn.preprocessing`_, `feature_selection`_, `decomposition`_, `kernel_approximation`_ and/or `cluster.FeatureAgglomeration`_.
+**Pre-processing components** allow None, one or a combination of up to 5 transformations from `sklearn.preprocessing`_, `feature_selection`_, `decomposition`_, `kernel_approximation`_, `cluster.FeatureAgglomeration`_ and/or `skrebate`_.
 **Estimator** selector supports estimators from `xgboost`_ and many scikit-learn modules, including `svm`_, `linear_model`_, `ensemble`_, `naive_bayes`_, `tree`_ and `neighbors`_.
 .. _`Scikit-learn pipeline Pipeline`: http://scikit-learn.org/stable/modules/generated/sklearn.pipeline.Pipeline.html
 .. _`sklearn.preprocessing`: http://scikit-learn.org/stable/modules/classes.html#module-sklearn.preprocessing
 .. _`feature_selection`: http://scikit-learn.org/stable/modules/classes.html#module-sklearn.feature_selection
 .. _`decomposition`: http://scikit-learn.org/stable/modules/classes.html#module-sklearn.decomposition
 .. _`kernel_approximation`: http://scikit-learn.org/stable/modules/classes.html#module-sklearn.kernel_approximation
 .. _`cluster.FeatureAgglomeration`: http://scikit-learn.org/stable/modules/generated/sklearn.cluster.FeatureAgglomeration.html
+.. _`skrebate`: https://epistasislab.github.io/scikit-rebate/using/
 ]]>
 </help>
 <expand macro="sklearn_citation"/>
 </tool>

Mercurial > repos > bgruening > sklearn_build_pipeline

comparison pipeline.xml @ 3:0857964e07c2 draft