Mercurial > repos > bgruening > sklearn_build_pipeline

diff pipeline.xml @ 3:0857964e07c2 draft
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit d00173591e4a783a4c1cb2664e4bb192ab5414f7
author: bgruening
date: Fri, 17 Aug 2018 12:26:40 -0400
parents: 73535ddcfa69
children: 86450dde8682
--- a/pipeline.xml	Tue Aug 07 05:45:45 2018 -0400
+++ b/pipeline.xml	Fri Aug 17 12:26:40 2018 -0400
@@ -3,7 +3,9 @@
     <macros>
         <import>main_macros.xml</import>
     </macros>
-    <expand macro="python_requirements"/>
+    <expand macro="python_requirements">
+        <requirement type="package" version="0.6">skrebate</requirement>
+    </expand>
     <expand macro="macro_stdio"/>
     <version_command>echo "@VERSION@"</version_command>
     <command>
@@ -16,18 +18,18 @@
         <configfile name="sklearn_pipeline_script">
             <![CDATA[
 import sys
+import os
 import json
 import pickle
 import pprint
-import xgboost
-import ast
-import sklearn.feature_selection
-from sklearn import ( preprocessing, svm, linear_model, ensemble, naive_bayes,
+import skrebate
+from sklearn import (preprocessing, svm, linear_model, ensemble, naive_bayes,
                     tree, neighbors, decomposition, kernel_approximation, cluster)
 from sklearn.pipeline import Pipeline
 
-@GET_ESTIMATOR_FUNCTION@
-@FEATURE_SELECTOR_FUNCTION@
+execfile("$__tool_directory__/utils.py")
+
+safe_eval = SafeEval()
 
 input_json_path = sys.argv[1]
 with open(input_json_path, "r") as param_handler:
@@ -45,34 +47,44 @@
         preprocessor = input_json["pre_processors"]["selected_pre_processor"]
         pre_processor_options = input_json["pre_processors"]["options"]
         my_class = getattr(preprocessing, preprocessor)
-        return my_class(**pre_processor_options)
-    if input_json['component_type'] == 'feature_selection':
-        fs_obj = feature_selector(input_json['fs_algorithm_selector'])
-        return fs_obj
-    if input_json['component_type'] == 'decomposition':
+        obj = my_class(**pre_processor_options)
+    elif input_json['component_type'] == 'feature_selection':
+        obj = feature_selector(input_json['fs_algorithm_selector'])
+    elif input_json['component_type'] == 'decomposition':
         algorithm = input_json['matrix_decomposition_selector']['select_algorithm']
         obj = getattr(decomposition, algorithm)()
         options = input_json['matrix_decomposition_selector']['text_params'].strip()
         if options != "":
-            options = ast.literal_eval('{' + options + '}')
+            options = safe_eval('dict(' + options + ')')
             obj.set_params(**options)
-        return obj
-    if input_json['component_type'] == 'kernel_approximation':
+    elif input_json['component_type'] == 'kernel_approximation':
         algorithm = input_json['kernel_approximation_selector']['select_algorithm']
         obj = getattr(kernel_approximation, algorithm)()
         options = input_json['kernel_approximation_selector']['text_params'].strip()
         if options != "":
-            options = ast.literal_eval('{' + options + '}')
+            options = safe_eval('dict(' + options + ')')
             obj.set_params(**options)
-        return obj
-    if input_json['component_type'] == 'FeatureAgglomeration':
+    elif input_json['component_type'] == 'FeatureAgglomeration':
         algorithm = input_json['FeatureAgglomeration_selector']['select_algorithm']
         obj = getattr(cluster, algorithm)()
         options = input_json['FeatureAgglomeration_selector']['text_params'].strip()
         if options != "":
-            options = ast.literal_eval('{' + options + '}')
+            options = safe_eval('dict(' + options + ')')
             obj.set_params(**options)
-        return obj
+    elif input_json['component_type'] == 'skrebate':
+        algorithm = input_json['skrebate_selector']['select_algorithm']
+        if algorithm == 'TuRF':
+            obj = getattr(skrebate, algorithm)(core_algorithm='ReliefF')
+        else:
+            obj = getattr(skrebate, algorithm)()
+        options = input_json['skrebate_selector']['text_params'].strip()
+        if options != "":
+            options = safe_eval('dict(' + options + ')')
+            obj.set_params(**options)
+    if 'n_jobs' in obj.get_params():
+        obj.set_params( n_jobs=N_JOBS )
+    return obj
+
 if len(params['pipeline_component']) == 1:
     step_obj = get_component( params['pipeline_component'][0]['component_selector'])
     if step_obj:
@@ -101,14 +113,15 @@
         <repeat name="pipeline_component" min="1" max="5" title="Pre-processing step">
             <conditional name="component_selector">
                 <param name="component_type" type="select" label="Choose the type of transformation:">
-                    <option value="none" selected="true">None</option>
+                    <option value="None" selected="true">None</option>
                     <option value="pre_processor">Sklearn Preprocessor</option>
                     <option value="feature_selection">Feature Selection</option>
                     <option value="decomposition">Matrix Decomposition</option>
                     <option value="kernel_approximation">Kernel Approximation</option>
                     <option value="FeatureAgglomeration">Agglomerate Features</option>
+                    <option value="skrebate">Skrebate algorithm</option>
                 </param>
-                <when value="none"/>
+                <when value="None"/>
                 <when value="pre_processor">
                     <conditional name="pre_processors">
                         <expand macro="sparse_preprocessors_ext" />
@@ -129,6 +142,9 @@
                 <when value="FeatureAgglomeration">
                     <expand macro="FeatureAgglomeration"/>
                 </when>
+                <when value="skrebate">
+                    <expand macro="skrebate"/>
+                </when>
             </conditional>
         </repeat>
         <section name="final_estimator" title="Final Estimator" expanded="true">
@@ -159,7 +175,7 @@
             </repeat>
             <param name="selected_module" value="svm"/>
             <param name="selected_estimator" value="SVR"/>
-            <param name="text_params" value="'kernel': 'linear'"/>
+            <param name="text_params" value="kernel='linear'"/>
             <output name="outfile" file="pipeline01" compare="sim_size" delta="1"/>
         </test>
         <test>
@@ -209,7 +225,7 @@
             </conditional>
             <param name="selected_module" value="ensemble"/>
             <param name="selected_estimator" value="RandomForestRegressor"/>
-            <param name="text_params" value="'n_estimators': 100, 'random_state': 42"/>
+            <param name="text_params" value="n_estimators=100, random_state=42"/>
             <output name="outfile" file="pipeline05" compare="sim_size" delta="1"/>
         </test>
         <test>
@@ -228,7 +244,7 @@
                 <param name="component_type" value="kernel_approximation"/>
                     <conditional name="kernel_approximation_selector">
                         <param name="select_algorithm" value="RBFSampler"/>
-                        <param name="text_params" value="'n_components': 10, 'gamma': 2.0"/>
+                        <param name="text_params" value="n_components=10, gamma=2.0"/>
                     </conditional>
             </conditional>
             <param name="selected_module" value="ensemble"/>
@@ -240,13 +256,37 @@
                 <param name="component_type" value="FeatureAgglomeration"/>
                     <conditional name="FeatureAgglomeration_selector">
                         <param name="select_algorithm" value="FeatureAgglomeration"/>
-                        <param name="text_params" value="'n_clusters': 3, 'affinity': 'euclidean'"/>
+                        <param name="text_params" value="n_clusters=3, affinity='euclidean'"/>
                     </conditional>
             </conditional>
             <param name="selected_module" value="ensemble"/>
             <param name="selected_estimator" value="AdaBoostClassifier"/>
             <output name="outfile" file="pipeline08" compare="sim_size" delta="1"/>
         </test>
+        <test>
+            <conditional name="component_selector">
+                <param name="component_type" value="skrebate"/>
+                    <conditional name="skrebate_selector">
+                        <param name="select_algorithm" value="ReliefF"/>
+                        <param name="text_params" value="n_features_to_select=3, n_neighbors=100"/>
+                    </conditional>
+            </conditional>
+            <param name="selected_module" value="ensemble"/>
+            <param name="selected_estimator" value="RandomForestRegressor"/>
+            <output name="outfile" file="pipeline09" compare="sim_size" delta="1"/>
+        </test>
+        <test>
+            <conditional name="component_selector">
+                <param name="component_type" value="skrebate"/>
+                    <conditional name="skrebate_selector">
+                        <param name="select_algorithm" value="TuRF"/>
+                        <param name="text_params" value=""/>
+                    </conditional>
+            </conditional>
+            <param name="selected_module" value="ensemble"/>
+            <param name="selected_estimator" value="RandomForestRegressor"/>
+            <output name="outfile" file="pipeline10" compare="sim_size" delta="1"/>
+        </test>
     </tests>
     <help>
         <![CDATA[
@@ -255,7 +295,7 @@
 that can be cross-validated together while setting different parameters.
 please refer to `Scikit-learn pipeline Pipeline`_.
 
-**Pre-processing components** allow None, one or a combination of up to 5 transformations from `sklearn.preprocessing`_, `feature_selection`_, `decomposition`_, `kernel_approximation`_ and/or `cluster.FeatureAgglomeration`_.
+**Pre-processing components** allow None, one or a combination of up to 5 transformations from `sklearn.preprocessing`_, `feature_selection`_, `decomposition`_, `kernel_approximation`_, `cluster.FeatureAgglomeration`_ and/or `skrebate`_.
 
 **Estimator** selector supports estimators from `xgboost`_ and many scikit-learn modules, including `svm`_, `linear_model`_, `ensemble`_, `naive_bayes`_, `tree`_ and `neighbors`_.
 
@@ -274,6 +314,7 @@
 .. _`decomposition`: http://scikit-learn.org/stable/modules/classes.html#module-sklearn.decomposition
 .. _`kernel_approximation`: http://scikit-learn.org/stable/modules/classes.html#module-sklearn.kernel_approximation
 .. _`cluster.FeatureAgglomeration`: http://scikit-learn.org/stable/modules/generated/sklearn.cluster.FeatureAgglomeration.html
+.. _`skrebate`: https://epistasislab.github.io/scikit-rebate/using/
 
         ]]>
     </help>
author	bgruening
date	Fri, 17 Aug 2018 12:26:40 -0400
parents	73535ddcfa69
children	86450dde8682