Mercurial > repos > bgruening > sklearn_searchcv
comparison search_model_validation.xml @ 2:907bb0418c9f draft
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit b1e5fa3170484d2cc3396f2abe99bb8cfcfa9c65
author | bgruening |
---|---|
date | Tue, 07 Aug 2018 05:45:28 -0400 |
parents | 91bf3f0d7455 |
children | f9fea8323bcb |
comparison
equal
deleted
inserted
replaced
1:f727f5ff7d60 | 2:907bb0418c9f |
---|---|
18 <configfile name="sklearn_search_model_validation_script"> | 18 <configfile name="sklearn_search_model_validation_script"> |
19 <![CDATA[ | 19 <![CDATA[ |
20 import sys | 20 import sys |
21 import json | 21 import json |
22 import pandas | 22 import pandas |
23 import re | |
23 import pickle | 24 import pickle |
24 import numpy as np | 25 import numpy as np |
25 import xgboost | 26 import xgboost |
26 import scipy | 27 import scipy |
27 from asteval import Interpreter, make_symbol_table | 28 from asteval import Interpreter, make_symbol_table |
28 from sklearn import metrics, preprocessing, model_selection, ensemble | 29 from sklearn import metrics, preprocessing, model_selection, ensemble |
29 from sklearn.pipeline import Pipeline | 30 from sklearn.pipeline import Pipeline |
30 | 31 |
31 @COLUMNS_FUNCTION@ | 32 @COLUMNS_FUNCTION@ |
32 @GET_ESTIMATOR_FUNCTION@ | 33 @GET_ESTIMATOR_FUNCTION@ |
34 @SAFE_EVAL_FUNCTION@ | |
33 @GET_SEARCH_PARAMS_FUNCTION@ | 35 @GET_SEARCH_PARAMS_FUNCTION@ |
36 @GET_CV_FUNCTION@ | |
34 | 37 |
35 input_json_path = sys.argv[1] | 38 input_json_path = sys.argv[1] |
36 with open(input_json_path, "r") as param_handler: | 39 with open(input_json_path, "r") as param_handler: |
37 params = json.load(param_handler) | 40 params = json.load(param_handler) |
38 | 41 |
40 infile1 = "$input_options.infile1" | 43 infile1 = "$input_options.infile1" |
41 infile2 = "$input_options.infile2" | 44 infile2 = "$input_options.infile2" |
42 infile_pipeline = "$search_schemes.infile_pipeline" | 45 infile_pipeline = "$search_schemes.infile_pipeline" |
43 outfile_result = "$outfile_result" | 46 outfile_result = "$outfile_result" |
44 outfile_estimator = "$outfile_estimator" | 47 outfile_estimator = "$outfile_estimator" |
45 #if $search_schemes.selected_search_scheme == "RandomizedSearchCV": | |
46 np.random.seed($search_schemes.random_seed) | |
47 #end if | |
48 | 48 |
49 params_builder = params['search_schemes']['search_params_builder'] | 49 params_builder = params['search_schemes']['search_params_builder'] |
50 | 50 |
51 input_type = params["input_options"]["selected_input"] | 51 input_type = params["input_options"]["selected_input"] |
52 if input_type=="tabular": | 52 if input_type=="tabular": |
85 | 85 |
86 optimizers = params["search_schemes"]["selected_search_scheme"] | 86 optimizers = params["search_schemes"]["selected_search_scheme"] |
87 optimizers = getattr(model_selection, optimizers) | 87 optimizers = getattr(model_selection, optimizers) |
88 | 88 |
89 options = params["search_schemes"]["options"] | 89 options = params["search_schemes"]["options"] |
90 options['cv'] = get_cv( options['cv'].strip() ) | |
90 if 'scoring' in options and options['scoring'] == '': | 91 if 'scoring' in options and options['scoring'] == '': |
91 options['scoring'] = None | 92 options['scoring'] = None |
92 if 'pre_dispatch' in options and options['pre_dispatch'] == '': | 93 if 'pre_dispatch' in options and options['pre_dispatch'] == '': |
93 options['pre_dispatch'] = None | 94 options['pre_dispatch'] = None |
94 | 95 |
121 <section name="options" title="Advanced Options for SearchCV" expanded="false"> | 122 <section name="options" title="Advanced Options for SearchCV" expanded="false"> |
122 <expand macro="search_cv_options"/> | 123 <expand macro="search_cv_options"/> |
123 </section> | 124 </section> |
124 </when> | 125 </when> |
125 <when value="RandomizedSearchCV"> | 126 <when value="RandomizedSearchCV"> |
126 <param name="random_seed" type="integer" value="65535" min="0" max="65535" label="Set up random seed:"/> | |
127 <expand macro="search_cv_estimator"/> | 127 <expand macro="search_cv_estimator"/> |
128 <section name="options" title="Advanced Options for SearchCV" expanded="false"> | 128 <section name="options" title="Advanced Options for SearchCV" expanded="false"> |
129 <expand macro="search_cv_options"/> | 129 <expand macro="search_cv_options"/> |
130 <param argument="n_iter" type="integer" value="10" label="Number of parameter settings that are sampled"/> | 130 <param argument="n_iter" type="integer" value="10" label="Number of parameter settings that are sampled"/> |
131 <expand macro="random_state"/> | 131 <expand macro="random_state"/> |
345 <assert_contents> | 345 <assert_contents> |
346 <has_text_matching expression=".+0.08045977011494253[^/w]+10[^/w]" /> | 346 <has_text_matching expression=".+0.08045977011494253[^/w]+10[^/w]" /> |
347 </assert_contents> | 347 </assert_contents> |
348 </output> | 348 </output> |
349 </test> | 349 </test> |
350 <test> | |
351 <param name="selected_search_scheme" value="GridSearchCV"/> | |
352 <param name="infile_pipeline" value="pipeline01"/> | |
353 <conditional name="search_param_selector"> | |
354 <param name="search_p" value="C: [1, 10, 100, 1000]"/> | |
355 <param name="selected_param_type" value="final_estimator_p"/> | |
356 </conditional> | |
357 <param name='cv' value="StratifiedKFold(n_splits=3, shuffle=True, random_state=10)"/> | |
358 <param name="infile1" value="regression_X.tabular" ftype="tabular"/> | |
359 <param name="header1" value="true" /> | |
360 <param name="selected_column_selector_option" value="all_columns"/> | |
361 <param name="infile2" value="regression_y.tabular" ftype="tabular"/> | |
362 <param name="header2" value="true" /> | |
363 <param name="selected_column_selector_option2" value="all_columns"/> | |
364 <output name="outfile_estimator" file="searchCV01" compare="sim_size" delta="1"/> | |
365 </test> | |
350 </tests> | 366 </tests> |
351 <help> | 367 <help> |
352 <![CDATA[ | 368 <![CDATA[ |
353 **What it does** | 369 **What it does** |
354 Searches optimized parameter values for an estimator or pipeline through either exhaustive grid cross validation search or Randomized cross validation search. | 370 Searches optimized parameter values for an estimator or pipeline through either exhaustive grid cross validation search or Randomized cross validation search. |