Mercurial > repos > bgruening > sklearn_sample_generator
diff main_macros.xml @ 24:97b467e06354 draft
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit c0a3a186966888e5787335a7628bf0a4382637e7
author | bgruening |
---|---|
date | Tue, 14 May 2019 18:07:39 -0400 |
parents | 4ba68dd788b3 |
children | 86a086d2bbed |
line wrap: on
line diff
--- a/main_macros.xml Sun Dec 30 01:52:56 2018 -0500 +++ b/main_macros.xml Tue May 14 18:07:39 2019 -0400 @@ -1,14 +1,17 @@ <macros> - <token name="@VERSION@">1.0</token> + <token name="@VERSION@">1.0.0.4</token> <xml name="python_requirements"> <requirements> <requirement type="package" version="3.6">python</requirement> - <requirement type="package" version="0.20.2">scikit-learn</requirement> - <requirement type="package" version="0.23.4">pandas</requirement> + <requirement type="package" version="0.20.3">scikit-learn</requirement> + <requirement type="package" version="0.24.2">pandas</requirement> <requirement type="package" version="0.80">xgboost</requirement> <requirement type="package" version="0.9.13">asteval</requirement> - <yield /> + <requirement type="package" version="0.6">skrebate</requirement> + <requirement type="package" version="0.4.2">imbalanced-learn</requirement> + <requirement type="package" version="0.16.0">mlxtend</requirement> + <yield/> </requirements> </xml> @@ -352,10 +355,10 @@ <option value="all_columns">All columns</option> </param> <when value="by_index_number"> - <param name="@COL_NAME@" multiple="@MULTIPLE@" type="data_column" data_ref="@INFILE@" label="Select target column(s):"/> + <param name="@COL_NAME@" multiple="@MULTIPLE@" type="data_column" use_header_names="true" data_ref="@INFILE@" label="Select target column(s):"/> </when> <when value="all_but_by_index_number"> - <param name="@COL_NAME@" multiple="@MULTIPLE@" type="data_column" data_ref="@INFILE@" label="Select target column(s):"/> + <param name="@COL_NAME@" multiple="@MULTIPLE@" type="data_column" use_header_names="true" data_ref="@INFILE@" label="Select target column(s):"/> </when> <when value="by_header_name"> <param name="@COL_NAME@" type="text" value="" label="Type header name(s):" help="Comma-separated string. For example: target1,target2"/> @@ -428,7 +431,7 @@ <option value="sparse">sparse matrix</option> </param> <when value="tabular"> - <expand macro="samples_tabular" multiple1="true"/> + <expand macro="samples_tabular" multiple1="true" multiple2="false"/> </when> <when value="sparse"> <expand macro="sparse_target"/> @@ -823,6 +826,8 @@ <option value="StratifiedShuffleSplit">StratifiedShuffleSplit</option> <option value="TimeSeriesSplit">TimeSeriesSplit</option> <option value="PredefinedSplit">PredefinedSplit</option> + <option value="OrderedKFold">OrderedKFold</option> + <option value="RepeatedOrderedKFold">RepeatedOrderedKFold</option> <yield/> </xml> @@ -872,6 +877,16 @@ <when value="PredefinedSplit"> <param argument="test_fold" type="text" value="" area="true" label="test_fold" help="List, e.g., [0, 1, -1, 1], represents two test sets, [X[0]] and [X[1], X[3]], X[2] is excluded from any test set due to '-1'."/> </when> + <when value="OrderedKFold"> + <expand macro="cv_n_splits"/> + <expand macro="cv_shuffle"/> + <expand macro="random_state"/> + </when> + <when value="RepeatedOrderedKFold"> + <expand macro="cv_n_splits"/> + <param argument="n_repeats" type="integer" value="5"/> + <expand macro="random_state"/> + </when> <yield/> </xml> @@ -929,7 +944,13 @@ </xml> <xml name="cv_groups" > - <param argument="groups" type="text" value="" area="true" label="Groups" help="Group lables in a list. e.g., [1, 1, 2, 2, 3, 3, 3]"/> + <section name="groups_selector" title="Groups column selector" expanded="true"> + <param name="infile_g" type="data" format="tabular" label="Choose dataset containing groups info:"/> + <param name="header_g" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolfalse" checked="False" label="Does the dataset contain header:" /> + <conditional name="column_selector_options_g"> + <expand macro="samples_column_selector_options" column_option="selected_column_selector_option_g" col_name="col_g" multiple="False" infile="infile_g"/> + </conditional> + </section> </xml> <xml name="feature_selection_algorithms"> @@ -943,6 +964,7 @@ <option value="SelectFromModel">SelectFromModel - Meta-transformer for selecting features based on importance weights</option> <option value="RFE">RFE - Feature ranking with recursive feature elimination</option> <option value="RFECV">RFECV - Feature ranking with recursive feature elimination and cross-validated selection of the best number of features</option> + <yield/> </xml> <xml name="feature_selection_algorithm_details"> @@ -991,7 +1013,7 @@ </when> <when value="VarianceThreshold"> <section name="options" title="Options" expanded="False"> - <param argument="threshold" type="float" value="" optional="True" label="Threshold" help="Features with a training-set variance lower than this threshold will be removed."/> + <param argument="threshold" type="float" value="0.0" optional="True" label="Threshold" help="Features with a training-set variance lower than this threshold will be removed."/> </section> </when> </xml> @@ -1047,13 +1069,47 @@ </when> </xml> - <xml name="feature_selection_RFECV"> + <xml name="feature_selection_RFECV_fs"> + <when value="RFECV"> + <yield/> + <section name="options" title="Advanced Options" expanded="False"> + <param argument="step" type="float" value="1" label="step" optional="true" help="Default = 1. " /> + <param argument="min_features_to_select" type="integer" value="1" optional="true" label="The minimum number of features to be selected"/> + <expand macro="cv"/> + <expand macro="scoring_selection"/> + <param argument="verbose" type="integer" value="0" label="verbose" help="Controls verbosity of output." /> + </section> + </when> + </xml> + + <xml name="feature_selection_RFECV_pipeline"> <when value="RFECV"> <yield/> <section name="options" title="Advanced Options" expanded="False"> <param argument="step" type="float" value="1" label="step" optional="true" help="Default = 1. " /> <param argument="min_features_to_select" type="integer" value="1" optional="true" label="The minimum number of features to be selected"/> <expand macro="cv_reduced"/> + <!-- TODO: group splitter support--> + <expand macro="scoring_selection"/> + <param argument="verbose" type="integer" value="0" label="verbose" help="Controls verbosity of output." /> + </section> + </when> + </xml> + + <xml name="feature_selection_DyRFECV_fs"> + <when value="DyRFECV"> + <yield/> + <section name="options" title="Advanced Options" expanded="False"> + <param argument="step" type="text" size="30" value="1" label="step" optional="true" help="Default = 1. Support float, int and list." > + <sanitizer> + <valid initial="default"> + <add value="["/> + <add value="]"/> + </valid> + </sanitizer> + </param> + <param argument="min_features_to_select" type="integer" value="1" optional="true" label="The minimum number of features to be selected"/> + <expand macro="cv"/> <expand macro="scoring_selection"/> <param argument="verbose" type="integer" value="0" label="verbose" help="Controls verbosity of output." /> </section> @@ -1061,7 +1117,7 @@ </xml> <xml name="feature_selection_pipeline"> - <!--compare to `feature_selection_fs`, no fitted estimator for SelectFromModel and no customer estimator for RFE and RFECV--> + <!--compare to `feature_selection_fs`, no fitted estimator for SelectFromModel and no custom estimator for RFE and RFECV--> <conditional name="fs_algorithm_selector"> <param name="selected_algorithm" type="select" label="Select a feature selection algorithm"> <expand macro="feature_selection_algorithms"/> @@ -1071,23 +1127,29 @@ <expand macro="feature_selection_RFE"> <expand macro="estimator_selector_all"/> </expand> - <expand macro="feature_selection_RFECV"> + <expand macro="feature_selection_RFECV_pipeline"> <expand macro="estimator_selector_all"/> </expand> + <!-- TODO: add DyRFECV to pipeline--> </conditional> </xml> <xml name="feature_selection_fs"> <conditional name="fs_algorithm_selector"> <param name="selected_algorithm" type="select" label="Select a feature selection algorithm"> - <expand macro="feature_selection_algorithms"/> + <expand macro="feature_selection_algorithms"> + <option value="DyRFECV">DyRFECV - Extended RFECV with changeable steps</option> + </expand> </param> <expand macro="feature_selection_algorithm_details"/> <expand macro="feature_selection_SelectFromModel"/> <expand macro="feature_selection_RFE"> <expand macro="estimator_selector_fs"/> </expand> - <expand macro="feature_selection_RFECV"> + <expand macro="feature_selection_RFECV_fs"> + <expand macro="estimator_selector_fs"/> + </expand> + <expand macro="feature_selection_DyRFECV_fs"> <expand macro="estimator_selector_fs"/> </expand> </conditional> @@ -1105,7 +1167,7 @@ <xml name="model_validation_common_options"> <expand macro="cv"/> - <expand macro="verbose"/> + <!-- expand macro="verbose"/> --> <yield/> </xml> @@ -1139,6 +1201,8 @@ <option value="neg_mean_squared_log_error">Regression -- 'neg_mean_squared_log_error'</option> <option value="neg_median_absolute_error">Regression -- 'neg_median_absolute_error'</option> <option value="r2">Regression -- 'r2'</option> + <option value="binarize_auc_scorer">anomaly detection -- binarize_auc_scorer</option> + <option value="binarize_average_precision_scorer">anomaly detection -- binarize_average_precision_scorer</option> </param> <when value="default"/> <when value="accuracy"><expand macro="secondary_scoring_selection_classification"/></when> @@ -1167,6 +1231,8 @@ <when value="neg_mean_squared_log_error"><expand macro="secondary_scoring_selection_regression"/></when> <when value="neg_median_absolute_error"><expand macro="secondary_scoring_selection_regression"/></when> <when value="r2"><expand macro="secondary_scoring_selection_regression"/></when> + <when value="binarize_auc_scorer"><expand macro="secondary_scoring_selection_anormaly"/></when> + <when value="binarize_average_precision_scorer"><expand macro="secondary_scoring_selection_anormaly"/></when> </conditional> </xml> @@ -1206,63 +1272,48 @@ </param> </xml> + <xml name="secondary_scoring_selection_anormaly"> + <param name="secondary_scoring" type="select" multiple="true" label="Additional scoring used in multi-metric mode:" help="If the same metric with the primary is chosen, the metric will be ignored."> + <option value="binarize_auc_scorer">anomaly detection -- binarize_auc_scorer</option> + <option value="binarize_average_precision_scorer">anomaly detection -- binarize_average_precision_scorer</option> + </param> + </xml> + <xml name="pre_dispatch" token_type="hidden" token_default_value="all" token_help="Number of predispatched jobs for parallel execution"> <param argument="pre_dispatch" type="@TYPE@" value="@DEFAULT_VALUE@" optional="true" label="pre_dispatch" help="@HELP@"/> </xml> <xml name="search_cv_estimator"> - <param name="infile_pipeline" type="data" format="zip" label="Choose the dataset containing pipeline object:"/> + <param name="infile_estimator" type="data" format="zip" label="Choose the dataset containing pipeline/estimator object"/> <section name="search_params_builder" title="Search parameters Builder" expanded="true"> - <repeat name="param_set" min="1" max="20" title="Parameter setting for search:"> - <conditional name="search_param_selector"> - <param name="selected_param_type" type="select" label="Choose the transformation the parameter belongs to"> - <option value="final_estimator_p" selected="true">Final estimator</option> - <option value="prep_1_p">Pre-processing step #1</option> - <option value="prep_2_p">Pre-processing step #2</option> - <option value="prep_3_p">Pre-processing step #3</option> - <option value="prep_4_p">Pre-processing step #4</option> - <option value="prep_5_p">Pre-processing step #5</option> + <param name="infile_params" type="data" format="tabular" label="Choose the dataset containing parameter names"/> + <repeat name="param_set" min="1" max="30" title="Parameter settings for search:"> + <param name="sp_name" type="select" label="Choose a parameter name (with current value)"> + <options from_dataset="infile_params" startswith="@"> + <column name="name" index="2"/> + <column name="value" index="1"/> + <filter type="unique_value" name="unique_param" column="1"/> + <filter type="sort_by" name="sorted_param" column="2"/> + </options> </param> - <when value="final_estimator_p"> - <expand macro="search_param_input" /> - </when> - <when value="prep_1_p"> - <expand macro="search_param_input" label="Pre_processing component #1 parameter:" help="One parameter per box. For example: with_centering: [True, False]."/> - </when> - <when value="prep_2_p"> - <expand macro="search_param_input" label="Pre_processing component #2 parameter:" help="One parameter per box. For example: k: [3, 5, 7, 9]. See bottom for more examples"/> - </when> - <when value="prep_3_p"> - <expand macro="search_param_input" label="Pre_processing component #3 parameter:" help="One parameter per box. For example: n_components: [1, 10, 100, 1000]. See bottom for more examples"/> - </when> - <when value="prep_4_p"> - <expand macro="search_param_input" label="Pre_processing component #4 parameter:" help="One parameter per box. For example: n_components: [1, 10, 100, 1000]. See bottom for more examples"/> - </when> - <when value="prep_5_p"> - <expand macro="search_param_input" label="Pre_processing component #5 parameter:" help="One parameter per box. For example: affinity: ['euclidean', 'l1', 'l2', 'manhattan']. See bottom for more examples"/> - </when> - </conditional> + <param name="sp_list" type="text" value="" optional="true" label="Search list" help="list or array-like, for example: [1, 10, 100, 1000], [True, False] and ['auto', 'sqrt', None]. See `help` section for more examples"> + <sanitizer> + <valid initial="default"> + <add value="'"/> + <add value="""/> + <add value="["/> + <add value="]"/> + </valid> + </sanitizer> + </param> </repeat> </section> </xml> - <xml name="search_param_input" token_label="Estimator parameter:" token_help="One parameter per box. For example: C: [1, 10, 100, 1000]. See bottom for more examples"> - <param name="search_p" type="text" value="" optional="true" label="@LABEL@" help="@HELP@"> - <sanitizer> - <valid initial="default"> - <add value="'"/> - <add value="""/> - <add value="["/> - <add value="]"/> - </valid> - </sanitizer> - </param> - </xml> - <xml name="search_cv_options"> <expand macro="scoring_selection"/> <expand macro="model_validation_common_options"/> - <expand macro="pre_dispatch" value="2*n_jobs" help="Controls the number of jobs that get dispatched during parallel execution"/> + <!--expand macro="pre_dispatch" default_value="2*n_jobs" help="Controls the number of jobs that get dispatched during parallel execution"/--> <param argument="iid" type="boolean" truevalue="booltrue" falsevalue="boolfalse" checked="true" label="iid" help="If True, data is identically distributed across the folds"/> <param argument="refit" type="boolean" truevalue="booltrue" falsevalue="boolfalse" checked="true" label="refit" help="Refit an estimator using the best found parameters on the whole dataset."/> <param argument="error_score" type="boolean" truevalue="booltrue" falsevalue="boolfalse" checked="true" label="Raise fit error:" help="If false, the metric score is assigned to NaN if an error occurs in estimator fitting and FitFailedWarning is raised."/> @@ -1403,12 +1454,12 @@ <conditional name="estimator_selector"> <param name="selected_module" type="select" label="Choose the module that contains target estimator:" > <expand macro="estimator_module_options"> - <option value="customer_estimator">Load a customer estimator</option> + <option value="custom_estimator">Load a custom estimator</option> </expand> </param> <expand macro="estimator_suboptions"> - <when value="customer_estimator"> - <param name="c_estimator" type="data" format="zip" label="Choose the dataset containing the customer estimator or pipeline:"/> + <when value="custom_estimator"> + <param name="c_estimator" type="data" format="zip" label="Choose the dataset containing the custom estimator or pipeline:"/> </when> </expand> </conditional> @@ -1591,6 +1642,7 @@ <option value="over_sampling.SMOTENC">over_sampling.SMOTENC</option> <option value="combine.SMOTEENN">combine.SMOTEENN</option> <option value="combine.SMOTETomek">combine.SMOTETomek</option> + <option value="Z_RandomOverSampler">Z_RandomOverSampler - for regression</option> </param> <when value="under_sampling.ClusterCentroids"> <expand macro="estimator_params_text" @@ -1668,6 +1720,33 @@ <expand macro="estimator_params_text" help="Default(=blank): sampling_strategy='auto', random_state=None, smote=None, tomek=None."/> </when> + <when value="Z_RandomOverSampler"> + <expand macro="estimator_params_text" + help="Default(=blank): sampling_strategy='auto', random_state=None, negative_thres=0, positive_thres=-1."/> + </when> + </conditional> + </xml> + + <xml name="stacking_ensemble_inputs"> + <section name="options" title="Advanced Options" expanded="false"> + <yield/> + <param argument="use_features_in_secondary" type="boolean" truevalue="booltrue" falsevalue="boolfalse" checked="false"/> + <param argument="store_train_meta_features" type="boolean" truevalue="booltrue" falsevalue="boolfalse" checked="false"/> + </section> + </xml> + + <xml name="stacking_base_estimator"> + <conditional name="estimator_selector"> + <param name="selected_module" type="select" label="Choose the module that contains target estimator:" > + <expand macro="estimator_module_options"> + <option value="custom_estimator">Load a custom estimator</option> + </expand> + </param> + <expand macro="estimator_suboptions"> + <when value="custom_estimator"> + <param name="c_estimator" type="data" format="zip" label="Choose the dataset containing the custom estimator or pipeline"/> + </when> + </expand> </conditional> </xml>