Galaxy |

Changeset 8:fd7a054ffdbd (2018-07-13)

Previous changeset 7:57a7471292df (2018-07-10) Next changeset 9:c6b3efcba7bd (2018-08-04)

Commit message:
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit f54ff2ba2f8e7542d68966ce5a6b17d7f624ac48

modified:
main_macros.xml
model_validation.xml

removed:
test-data/mv_result07.tabular

diff -r 57a7471292df -r fd7a054ffdbd main_macros.xml
--- a/main_macros.xml Tue Jul 10 03:13:16 2018 -0400
+++ b/main_macros.xml Fri Jul 13 03:56:45 2018 -0400

[

b'@@ -35,7 +35,8 @@\n if not options[\'threshold\'] or options[\'threshold\'] == \'None\':\n options[\'threshold\'] = None\n if \'extra_estimator\' in inputs and inputs[\'extra_estimator\'][\'has_estimator\'] == \'no_load\':\n- fitted_estimator = pickle.load(open("inputs[\'extra_estimator\'][\'fitted_estimator\']", \'r\'))\n+ with open("inputs[\'extra_estimator\'][\'fitted_estimator\']", \'rb\') as model_handler:\n+ fitted_estimator = pickle.load(model_handler)\n new_selector = selector(fitted_estimator, prefit=True, **options)\n else:\n estimator=inputs["estimator"]\n@@ -83,7 +84,7 @@\n parse_dates=True\n )\n else:\n- X = mmread(open(file1, \'r\'))\n+ X = mmread(file1)\n \n header = \'infer\' if params["selected_tasks"]["selected_algorithms"]["input_options"]["header2"] else None\n column_option = params["selected_tasks"]["selected_algorithms"]["input_options"]["column_selector_options_2"]["selected_column_selector_option2"]\n@@ -432,19 +433,6 @@\n \n \n \n- <xml name="tabular_input">\n- <param name="infile" type="data" format="tabular" label="Data file with numeric values"/>\n- <param name="start_column" type="data_column" data_ref="infile" optional="True" label="Select a subset of data. Start column:" />\n- <param name="end_column" type="data_column" data_ref="infile" optional="True" label="End column:" />\n- </xml>\n-\n- <xml name="sample_cols" token_label1="File containing true class labels:" token_label2="File containing predicted class labels:" token_multiple1="False" token_multiple2="False" token_format1="tabular" token_format2="tabular" token_help1="" token_help2="">\n- <param name="infile1" type="data" format="@FORMAT1@" label="@LABEL1@" help="@HELP1@"/>\n- <param name="col1" multiple="@MULTIPLE1@" type="data_column" data_ref="infile1" label="Select target column(s):"/>\n- <param name="infile2" type="data" format="@FORMAT2@" label="@LABEL2@" help="@HELP2@"/>\n- <param name="col2" multiple="@MULTIPLE2@" type="data_column" data_ref="infile2" label="Select target column(s):"/>\n- <yield/>\n- </xml>\n \n <xml name="samples_tabular" token_multiple1="false" token_multiple2="false">\n <param name="infile1" type="data" format="tabular" label="Training samples dataset:"/>\n@@ -472,13 +460,13 @@\n <param name="@COL_NAME@" multiple="@MULTIPLE@" type="data_column" data_ref="@INFILE@" label="Select target column(s):"/>\n </when>\n <when value="by_header_name">\n- <param name="@COL_NAME@" type="text" value="" label="Type header name(s):" help="String seperate by colon. For example: target1,target2"/>\n+ <param name="@COL_NAME@" type="text" value="" label="Type header name(s):" help="Comma-separated string. For example: target1,target2"/>\n </when>\n <when value="all_but_by_index_number">\n <param name="@COL_NAME@" multiple="@MULTIPLE@" type="data_column" data_ref="@INFILE@" label="Select target column(s):"/>\n </when>\n <when value="all_but_by_header_name">\n- <param name="@COL_NAME@" type="text" value="" label="Type header name(s):" help="String seperate by colon. For example: target1,target2"/>\n+ <param name="@COL_NAME@" type="text" value="" label="Type header name(s):" help="Comma-separated string. For example: target1,target2"/>\n </when>\n <when value="all_columns">\n </when>\n@@ -553,11 +541,6 @@\n </conditional>\n </xml>\n \n- <xml name="multitype_input" token_format="tabular" token_help="All datasets with tabular format are supporetd.">\n- <param name="infile_transform" type="data" format="@FORMAT@" label="Select a dataset to transform:" help="@HELP@"/>\n- </xml>\n-\n-\n \n <xml name="nn_advanced_options">\n <section name="options" title="Advanced Options" expanded="False">\n@@ -822,9 +805,17 @@\n </param>\n </xml>\n \n+ <xml name="sparse_preprocessors_ext">\n+ <expand macro="sparse_preprocessors">\n+ <option value="KernelCenterer">Kernel Centerer (Centers a kernel '..b' Options" expanded="False">\n+ \n+ <param argument="copy" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolflase" checked="true"\n+ label="Use a copy of data for precomputing normalization" help=" "/>\n+ </section>\n+ </when>\n+ <when value="PolynomialFeatures">\n+ <section name="options" title="Advanced Options" expanded="False">\n+ <param argument="degree" type="integer" optional="true" value="2" label="The degree of the polynomial features " help=""/>\n+ <param argument="interaction_only" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolflase" checked="false" label="Produce interaction features only" help="(Features that are products of at most degree distinct input features) "/>\n+ <param argument="include_bias" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolflase" checked="true" label="Include a bias column" help="Feature in which all polynomial powers are zero "/>\n+ </section>\n+ </when>\n+ <when value="RobustScaler">\n+ <section name="options" title="Advanced Options" expanded="False">\n+ \n+ <param argument="with_centering" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolflase" checked="true"\n+ label="Center the data before scaling" help=" "/>\n+ <param argument="with_scaling" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolflase" checked="true"\n+ label="Scale the data to interquartile range" help=" "/>\n+ <param argument="copy" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolflase" checked="true"\n+ label="Use a copy of data for inplace scaling" help=" "/>\n+ </section>\n+ </when>\n+ </expand>\n+ </xml>\n+\n <xml name="estimator_input_no_fit">\n <expand macro="feature_selection_estimator" />\n <conditional name="extra_estimator">\n@@ -892,6 +914,7 @@\n <expand macro="feature_selection_estimator_choices" />\n </conditional>\n </xml>\n+\n <xml name="feature_selection_all">\n <conditional name="feature_selection_algorithms">\n <param name="selected_algorithm" type="select" label="Select a feature selection algorithm">\n@@ -1014,6 +1037,7 @@\n </when-->\n </conditional>\n </xml>\n+\n <xml name="feature_selection_score_function">\n <param argument="score_func" type="select" label="Select a score function">\n <option value="chi2">chi2 - Compute chi-squared stats between each non-negative feature and class</option>\n@@ -1023,6 +1047,7 @@\n <option value="mutual_info_regression">mutual_info_regression - Estimate mutual information for a continuous target variable</option>\n </param>\n </xml>\n+\n <xml name="feature_selection_estimator">\n <param argument="estimator" type="select" label="Select an estimator" help="The base estimator from which the transformer is built.">\n <option value="svm.SVR(kernel="linear")">svm.SVR(kernel="linear")</option>\n@@ -1032,6 +1057,7 @@\n <option value="ensemble.RandomForestRegressor(n_estimators = 1000, random_state = 42)">ensemble.RandomForestRegressor(n_estimators = 1000, random_state = 42)</option>\n </param>\n </xml>\n+\n <xml name="feature_selection_extra_estimator"> \n <param name="has_estimator" type="select" label="Does your estimator on the list above?">\n <option value="yes">Yes, my estimator is on the list</option>\n@@ -1039,6 +1065,7 @@\n <yield/>\n </param>\n </xml>\n+\n <xml name="feature_selection_estimator_choices">\n <when value="yes">\n </when>\n@@ -1047,6 +1074,7 @@\n </when>\n <yield/>\n </xml>\n+\n <xml name="feature_selection_methods">\n <conditional name="select_methods">\n <param name="selected_method" type="select" label="Select an operation">\n'

diff -r 57a7471292df -r fd7a054ffdbd model_validation.xml
--- a/model_validation.xml Tue Jul 10 03:13:16 2018 -0400
+++ b/model_validation.xml Fri Jul 13 03:56:45 2018 -0400

[

@@ -22,7 +22,7 @@
import pickle
import numpy as np
import sklearn.model_selection
-from sklearn import svm, linear_model, ensemble
+from sklearn import svm, linear_model, ensemble, preprocessing
from sklearn.pipeline import Pipeline

@COLUMNS_FUNCTION@
@@ -30,7 +30,8 @@
@FEATURE_SELECTOR_FUNCTION@

input_json_path = sys.argv[1]
-params = json.load(open(input_json_path, "r"))
+with open(input_json_path, "r") as param_handler:
+    params = json.load(param_handler)

input_type = params["input_options"]["selected_input"]
if input_type=="tabular":
@@ -49,7 +50,7 @@
             parse_dates=True
     )
else:
-    X = mmread(open("$input_options.infile1", 'r'))
+    X = mmread("$input_options.infile1")

header = 'infer' if params["input_options"]["header2"] else None
column_option = params["input_options"]["column_selector_options_2"]["selected_column_selector_option2"]
@@ -75,10 +76,17 @@

pipeline_steps = []

+## Set up pre_processor and add to pipeline steps.
+if params['pre_processing']['do_pre_processing'] == 'Yes':
+    preprocessor = params["pre_processing"]["pre_processors"]["selected_pre_processor"]
+    pre_processor_options = params["pre_processing"]["pre_processors"]["options"]
+    my_class = getattr(preprocessing, preprocessor)
+    pipeline_steps.append( ('pre_processor', my_class(**pre_processor_options)) )
+
## Set up feature selector and add to pipeline steps.
if params['feature_selection']['do_feature_selection'] == 'Yes':
     feature_selector = feature_selector(params['feature_selection']['feature_selection_algorithms'])
-    pipeline_steps.append( ('feature_selector', feature_selector))
+    pipeline_steps.append( ('feature_selector', feature_selector) )

## Set up estimator and add to pipeline.
estimator=params["model_validation_functions"]["estimator"]
@@ -138,6 +146,19 @@
         </configfile>
     </configfiles>
     <inputs>
+        <conditional name="pre_processing">
+            <param name="do_pre_processing" type="select" label="Do pre_processing?">
+                <option value="No" selected="true"/>
+                <option value="Yes"/>
+            </param>
+            <when value="No"/>
+            <when value="Yes">
+                <conditional name="pre_processors">
+                    <expand macro="sparse_preprocessors_ext" />
+                    <expand macro="sparse_preprocessor_options_ext" />
+                </conditional>
+            </when>
+        </conditional>
         <conditional name="feature_selection">
             <param name="do_feature_selection" type="select" label="Do feature selection?">
                 <option value="No" selected="true"/>
@@ -352,7 +373,54 @@
             <param name="infile2" value="regression_y.tabular" ftype="tabular"/>
             <param name="header2" value="true" />
             <param name="selected_column_selector_option2" value="all_columns"/>
-            <output name="outfile" file="mv_result07.tabular"/>
+            <output name="outfile" >
+                <assert_contents>
+                    <has_line line="0.7824428015300172" />
+                </assert_contents>
+            </output>
+        </test>
+        <test>
+            <param name="do_pre_processing" value="Yes"/>
+            <param name="selected_pre_processor" value="RobustScaler"/>
+            <param name="do_feature_selection" value="Yes"/>
+            <param name="selected_algorithm" value="SelectKBest"/>
+            <param name="score_func" value="f_classif"/>
+            <param name="selected_function" value="GridSearchCV"/>
+            <param name="estimator" value="svm.SVR(kernel="linear")"/>
+            <param name="has_estimator" value="yes"/>
+            <param name="param_grid" value="[{'feature_selector__k': [3, 5, 7, 9], 'estimator__C': [1, 10, 100, 1000]}]"/>
+            <param name="return_type" value="best_score_"/>
+            <param name="infile1" value="regression_X.tabular" ftype="tabular"/>
+            <param name="header1" value="true" />
+            <param name="selected_column_selector_option" value="all_columns"/>
+            <param name="infile2" value="regression_y.tabular" ftype="tabular"/>
+            <param name="header2" value="true" />
+            <param name="selected_column_selector_option2" value="all_columns"/>
+            <output name="outfile" >
+                <assert_contents>
+                    <has_line line="0.7938837807353147" />
+                </assert_contents>
+            </output>
+        </test>
+         <test>
+            <param name="do_pre_processing" value="Yes"/>
+            <param name="selected_pre_processor" value="RobustScaler"/>
+            <param name="selected_function" value="GridSearchCV"/>
+            <param name="estimator" value="svm.SVR(kernel="linear")"/>
+            <param name="has_estimator" value="yes"/>
+            <param name="param_grid" value="[{'estimator__C': [1, 10, 100, 1000]}]"/>
+            <param name="return_type" value="best_score_"/>
+            <param name="infile1" value="regression_X.tabular" ftype="tabular"/>
+            <param name="header1" value="true" />
+            <param name="selected_column_selector_option" value="all_columns"/>
+            <param name="infile2" value="regression_y.tabular" ftype="tabular"/>
+            <param name="header2" value="true" />
+            <param name="selected_column_selector_option2" value="all_columns"/>
+            <output name="outfile" >
+                <assert_contents>
+                    <has_line line="0.7904476204861263" />
+                </assert_contents>
+            </output>
         </test>
     </tests>
     <help>

diff -r 57a7471292df -r fd7a054ffdbd test-data/mv_result07.tabular
--- a/test-data/mv_result07.tabular Tue Jul 10 03:13:16 2018 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000

@@ -1,1 +0,0 @@
-0.7824428015300172