comparison search_model_validation.xml @ 2:907bb0418c9f draft

planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit b1e5fa3170484d2cc3396f2abe99bb8cfcfa9c65
author bgruening
date Tue, 07 Aug 2018 05:45:28 -0400
parents 91bf3f0d7455
children f9fea8323bcb
comparison
equal deleted inserted replaced
1:f727f5ff7d60 2:907bb0418c9f
18 <configfile name="sklearn_search_model_validation_script"> 18 <configfile name="sklearn_search_model_validation_script">
19 <![CDATA[ 19 <![CDATA[
20 import sys 20 import sys
21 import json 21 import json
22 import pandas 22 import pandas
23 import re
23 import pickle 24 import pickle
24 import numpy as np 25 import numpy as np
25 import xgboost 26 import xgboost
26 import scipy 27 import scipy
27 from asteval import Interpreter, make_symbol_table 28 from asteval import Interpreter, make_symbol_table
28 from sklearn import metrics, preprocessing, model_selection, ensemble 29 from sklearn import metrics, preprocessing, model_selection, ensemble
29 from sklearn.pipeline import Pipeline 30 from sklearn.pipeline import Pipeline
30 31
31 @COLUMNS_FUNCTION@ 32 @COLUMNS_FUNCTION@
32 @GET_ESTIMATOR_FUNCTION@ 33 @GET_ESTIMATOR_FUNCTION@
34 @SAFE_EVAL_FUNCTION@
33 @GET_SEARCH_PARAMS_FUNCTION@ 35 @GET_SEARCH_PARAMS_FUNCTION@
36 @GET_CV_FUNCTION@
34 37
35 input_json_path = sys.argv[1] 38 input_json_path = sys.argv[1]
36 with open(input_json_path, "r") as param_handler: 39 with open(input_json_path, "r") as param_handler:
37 params = json.load(param_handler) 40 params = json.load(param_handler)
38 41
40 infile1 = "$input_options.infile1" 43 infile1 = "$input_options.infile1"
41 infile2 = "$input_options.infile2" 44 infile2 = "$input_options.infile2"
42 infile_pipeline = "$search_schemes.infile_pipeline" 45 infile_pipeline = "$search_schemes.infile_pipeline"
43 outfile_result = "$outfile_result" 46 outfile_result = "$outfile_result"
44 outfile_estimator = "$outfile_estimator" 47 outfile_estimator = "$outfile_estimator"
45 #if $search_schemes.selected_search_scheme == "RandomizedSearchCV":
46 np.random.seed($search_schemes.random_seed)
47 #end if
48 48
49 params_builder = params['search_schemes']['search_params_builder'] 49 params_builder = params['search_schemes']['search_params_builder']
50 50
51 input_type = params["input_options"]["selected_input"] 51 input_type = params["input_options"]["selected_input"]
52 if input_type=="tabular": 52 if input_type=="tabular":
85 85
86 optimizers = params["search_schemes"]["selected_search_scheme"] 86 optimizers = params["search_schemes"]["selected_search_scheme"]
87 optimizers = getattr(model_selection, optimizers) 87 optimizers = getattr(model_selection, optimizers)
88 88
89 options = params["search_schemes"]["options"] 89 options = params["search_schemes"]["options"]
90 options['cv'] = get_cv( options['cv'].strip() )
90 if 'scoring' in options and options['scoring'] == '': 91 if 'scoring' in options and options['scoring'] == '':
91 options['scoring'] = None 92 options['scoring'] = None
92 if 'pre_dispatch' in options and options['pre_dispatch'] == '': 93 if 'pre_dispatch' in options and options['pre_dispatch'] == '':
93 options['pre_dispatch'] = None 94 options['pre_dispatch'] = None
94 95
121 <section name="options" title="Advanced Options for SearchCV" expanded="false"> 122 <section name="options" title="Advanced Options for SearchCV" expanded="false">
122 <expand macro="search_cv_options"/> 123 <expand macro="search_cv_options"/>
123 </section> 124 </section>
124 </when> 125 </when>
125 <when value="RandomizedSearchCV"> 126 <when value="RandomizedSearchCV">
126 <param name="random_seed" type="integer" value="65535" min="0" max="65535" label="Set up random seed:"/>
127 <expand macro="search_cv_estimator"/> 127 <expand macro="search_cv_estimator"/>
128 <section name="options" title="Advanced Options for SearchCV" expanded="false"> 128 <section name="options" title="Advanced Options for SearchCV" expanded="false">
129 <expand macro="search_cv_options"/> 129 <expand macro="search_cv_options"/>
130 <param argument="n_iter" type="integer" value="10" label="Number of parameter settings that are sampled"/> 130 <param argument="n_iter" type="integer" value="10" label="Number of parameter settings that are sampled"/>
131 <expand macro="random_state"/> 131 <expand macro="random_state"/>
345 <assert_contents> 345 <assert_contents>
346 <has_text_matching expression=".+0.08045977011494253[^/w]+10[^/w]" /> 346 <has_text_matching expression=".+0.08045977011494253[^/w]+10[^/w]" />
347 </assert_contents> 347 </assert_contents>
348 </output> 348 </output>
349 </test> 349 </test>
350 <test>
351 <param name="selected_search_scheme" value="GridSearchCV"/>
352 <param name="infile_pipeline" value="pipeline01"/>
353 <conditional name="search_param_selector">
354 <param name="search_p" value="C: [1, 10, 100, 1000]"/>
355 <param name="selected_param_type" value="final_estimator_p"/>
356 </conditional>
357 <param name='cv' value="StratifiedKFold(n_splits=3, shuffle=True, random_state=10)"/>
358 <param name="infile1" value="regression_X.tabular" ftype="tabular"/>
359 <param name="header1" value="true" />
360 <param name="selected_column_selector_option" value="all_columns"/>
361 <param name="infile2" value="regression_y.tabular" ftype="tabular"/>
362 <param name="header2" value="true" />
363 <param name="selected_column_selector_option2" value="all_columns"/>
364 <output name="outfile_estimator" file="searchCV01" compare="sim_size" delta="1"/>
365 </test>
350 </tests> 366 </tests>
351 <help> 367 <help>
352 <![CDATA[ 368 <![CDATA[
353 **What it does** 369 **What it does**
354 Searches optimized parameter values for an estimator or pipeline through either exhaustive grid cross validation search or Randomized cross validation search. 370 Searches optimized parameter values for an estimator or pipeline through either exhaustive grid cross validation search or Randomized cross validation search.