Mercurial > repos > bgruening > sklearn_sample_generator

diff main_macros.xml @ 24:97b467e06354 draft
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit c0a3a186966888e5787335a7628bf0a4382637e7
author: bgruening
date: Tue, 14 May 2019 18:07:39 -0400
parents: 4ba68dd788b3
children: 86a086d2bbed
--- a/main_macros.xml	Sun Dec 30 01:52:56 2018 -0500
+++ b/main_macros.xml	Tue May 14 18:07:39 2019 -0400
@@ -1,14 +1,17 @@
 <macros>
-  <token name="@VERSION@">1.0</token>
+  <token name="@VERSION@">1.0.0.4</token>
 
   <xml name="python_requirements">
       <requirements>
           <requirement type="package" version="3.6">python</requirement>
-          <requirement type="package" version="0.20.2">scikit-learn</requirement>
-          <requirement type="package" version="0.23.4">pandas</requirement>
+          <requirement type="package" version="0.20.3">scikit-learn</requirement>
+          <requirement type="package" version="0.24.2">pandas</requirement>
           <requirement type="package" version="0.80">xgboost</requirement>
           <requirement type="package" version="0.9.13">asteval</requirement>
-          <yield />
+          <requirement type="package" version="0.6">skrebate</requirement>
+          <requirement type="package" version="0.4.2">imbalanced-learn</requirement>
+          <requirement type="package" version="0.16.0">mlxtend</requirement>
+          <yield/>
       </requirements>
   </xml>
 
@@ -352,10 +355,10 @@
       <option value="all_columns">All columns</option>
     </param>
     <when value="by_index_number">
-      <param name="@COL_NAME@" multiple="@MULTIPLE@" type="data_column" data_ref="@INFILE@" label="Select target column(s):"/>
+      <param name="@COL_NAME@" multiple="@MULTIPLE@" type="data_column" use_header_names="true" data_ref="@INFILE@" label="Select target column(s):"/>
     </when>
     <when value="all_but_by_index_number">
-      <param name="@COL_NAME@" multiple="@MULTIPLE@" type="data_column" data_ref="@INFILE@" label="Select target column(s):"/>
+      <param name="@COL_NAME@" multiple="@MULTIPLE@" type="data_column" use_header_names="true" data_ref="@INFILE@" label="Select target column(s):"/>
     </when>
     <when value="by_header_name">
       <param name="@COL_NAME@" type="text" value="" label="Type header name(s):" help="Comma-separated string. For example: target1,target2"/>
@@ -428,7 +431,7 @@
           <option value="sparse">sparse matrix</option>
       </param>
       <when value="tabular">
-          <expand macro="samples_tabular" multiple1="true"/>
+          <expand macro="samples_tabular" multiple1="true" multiple2="false"/>
       </when>
       <when value="sparse">
           <expand macro="sparse_target"/>
@@ -823,6 +826,8 @@
     <option value="StratifiedShuffleSplit">StratifiedShuffleSplit</option>
     <option value="TimeSeriesSplit">TimeSeriesSplit</option>
     <option value="PredefinedSplit">PredefinedSplit</option>
+    <option value="OrderedKFold">OrderedKFold</option>
+    <option value="RepeatedOrderedKFold">RepeatedOrderedKFold</option>
     <yield/>
   </xml>
 
@@ -872,6 +877,16 @@
     <when value="PredefinedSplit">
       <param argument="test_fold" type="text" value="" area="true" label="test_fold" help="List, e.g., [0, 1, -1, 1], represents two test sets, [X[0]] and [X[1], X[3]], X[2] is excluded from any test set due to '-1'."/>
     </when>
+    <when value="OrderedKFold">
+      <expand macro="cv_n_splits"/>
+      <expand macro="cv_shuffle"/>
+      <expand macro="random_state"/>
+    </when>
+    <when value="RepeatedOrderedKFold">
+      <expand macro="cv_n_splits"/>
+      <param argument="n_repeats" type="integer" value="5"/>
+      <expand macro="random_state"/>
+    </when>
     <yield/>
   </xml>
 
@@ -929,7 +944,13 @@
   </xml>
 
   <xml name="cv_groups" >
-    <param argument="groups" type="text" value="" area="true" label="Groups" help="Group lables in a list. e.g., [1, 1, 2, 2, 3, 3, 3]"/>
+    <section name="groups_selector" title="Groups column selector" expanded="true">
+      <param name="infile_g" type="data" format="tabular" label="Choose dataset containing groups info:"/>
+      <param name="header_g" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolfalse" checked="False" label="Does the dataset contain header:" />
+      <conditional name="column_selector_options_g">
+        <expand macro="samples_column_selector_options" column_option="selected_column_selector_option_g" col_name="col_g" multiple="False" infile="infile_g"/>
+      </conditional>
+    </section>
   </xml>
 
   <xml name="feature_selection_algorithms">
@@ -943,6 +964,7 @@
     <option value="SelectFromModel">SelectFromModel - Meta-transformer for selecting features based on importance weights</option>
     <option value="RFE">RFE - Feature ranking with recursive feature elimination</option>
     <option value="RFECV">RFECV - Feature ranking with recursive feature elimination and cross-validated selection of the best number of features</option>
+    <yield/>
   </xml>
 
   <xml name="feature_selection_algorithm_details">
@@ -991,7 +1013,7 @@
     </when>
     <when value="VarianceThreshold">
       <section name="options" title="Options" expanded="False">
-        <param argument="threshold" type="float" value="" optional="True" label="Threshold" help="Features with a training-set variance lower than this threshold will be removed."/>
+        <param argument="threshold" type="float" value="0.0" optional="True" label="Threshold" help="Features with a training-set variance lower than this threshold will be removed."/>
       </section>
     </when>
   </xml>
@@ -1047,13 +1069,47 @@
     </when>
   </xml>
 
-  <xml name="feature_selection_RFECV">
+  <xml name="feature_selection_RFECV_fs">
+    <when value="RFECV">
+      <yield/>
+      <section name="options" title="Advanced Options" expanded="False">
+        <param argument="step" type="float" value="1" label="step" optional="true" help="Default = 1. " />
+        <param argument="min_features_to_select" type="integer" value="1" optional="true" label="The minimum number of features to be selected"/>
+        <expand macro="cv"/>
+        <expand macro="scoring_selection"/>
+        <param argument="verbose" type="integer" value="0" label="verbose" help="Controls verbosity of output." />
+      </section>
+    </when>
+  </xml>
+
+  <xml name="feature_selection_RFECV_pipeline">
     <when value="RFECV">
       <yield/>
       <section name="options" title="Advanced Options" expanded="False">
         <param argument="step" type="float" value="1" label="step" optional="true" help="Default = 1. " />
         <param argument="min_features_to_select" type="integer" value="1" optional="true" label="The minimum number of features to be selected"/>
         <expand macro="cv_reduced"/>
+        <!-- TODO: group splitter support-->
+        <expand macro="scoring_selection"/>
+        <param argument="verbose" type="integer" value="0" label="verbose" help="Controls verbosity of output." />
+      </section>
+    </when>
+  </xml>
+
+  <xml name="feature_selection_DyRFECV_fs">
+    <when value="DyRFECV">
+      <yield/>
+      <section name="options" title="Advanced Options" expanded="False">
+        <param argument="step" type="text" size="30" value="1" label="step" optional="true" help="Default = 1. Support float, int and list." >
+          <sanitizer>
+            <valid initial="default">
+              <add value="["/>
+              <add value="]"/>
+            </valid>
+          </sanitizer>
+        </param>
+        <param argument="min_features_to_select" type="integer" value="1" optional="true" label="The minimum number of features to be selected"/>
+        <expand macro="cv"/>
         <expand macro="scoring_selection"/>
         <param argument="verbose" type="integer" value="0" label="verbose" help="Controls verbosity of output." />
       </section>
@@ -1061,7 +1117,7 @@
   </xml>
 
   <xml name="feature_selection_pipeline">
-    <!--compare to `feature_selection_fs`, no fitted estimator for SelectFromModel and no customer estimator for RFE and RFECV-->
+    <!--compare to `feature_selection_fs`, no fitted estimator for SelectFromModel and no custom estimator for RFE and RFECV-->
     <conditional name="fs_algorithm_selector">
       <param name="selected_algorithm" type="select" label="Select a feature selection algorithm">
         <expand macro="feature_selection_algorithms"/>
@@ -1071,23 +1127,29 @@
       <expand macro="feature_selection_RFE">
         <expand macro="estimator_selector_all"/>
       </expand>  
-      <expand macro="feature_selection_RFECV">
+      <expand macro="feature_selection_RFECV_pipeline">
         <expand macro="estimator_selector_all"/>
       </expand>
+      <!-- TODO: add DyRFECV to pipeline-->
     </conditional>
   </xml>
 
   <xml name="feature_selection_fs">
     <conditional name="fs_algorithm_selector">
       <param name="selected_algorithm" type="select" label="Select a feature selection algorithm">
-        <expand macro="feature_selection_algorithms"/>
+        <expand macro="feature_selection_algorithms">
+          <option value="DyRFECV">DyRFECV - Extended RFECV with changeable steps</option>
+        </expand>
       </param>
       <expand macro="feature_selection_algorithm_details"/>
       <expand macro="feature_selection_SelectFromModel"/>
       <expand macro="feature_selection_RFE">
         <expand macro="estimator_selector_fs"/>
       </expand>  
-      <expand macro="feature_selection_RFECV">
+      <expand macro="feature_selection_RFECV_fs">
+        <expand macro="estimator_selector_fs"/>
+      </expand>
+      <expand macro="feature_selection_DyRFECV_fs">
         <expand macro="estimator_selector_fs"/>
       </expand>
     </conditional>
@@ -1105,7 +1167,7 @@
 
   <xml name="model_validation_common_options">
     <expand macro="cv"/>
-    <expand macro="verbose"/>
+    <!-- expand macro="verbose"/> -->
     <yield/>
   </xml>
 
@@ -1139,6 +1201,8 @@
         <option value="neg_mean_squared_log_error">Regression -- 'neg_mean_squared_log_error'</option>
         <option value="neg_median_absolute_error">Regression -- 'neg_median_absolute_error'</option>
         <option value="r2">Regression -- 'r2'</option>
+        <option value="binarize_auc_scorer">anomaly detection -- binarize_auc_scorer</option>
+        <option value="binarize_average_precision_scorer">anomaly detection -- binarize_average_precision_scorer</option>
       </param>
       <when value="default"/>
       <when value="accuracy"><expand macro="secondary_scoring_selection_classification"/></when>
@@ -1167,6 +1231,8 @@
       <when value="neg_mean_squared_log_error"><expand macro="secondary_scoring_selection_regression"/></when>
       <when value="neg_median_absolute_error"><expand macro="secondary_scoring_selection_regression"/></when>
       <when value="r2"><expand macro="secondary_scoring_selection_regression"/></when>
+      <when value="binarize_auc_scorer"><expand macro="secondary_scoring_selection_anormaly"/></when>
+      <when value="binarize_average_precision_scorer"><expand macro="secondary_scoring_selection_anormaly"/></when>
     </conditional>
   </xml>
 
@@ -1206,63 +1272,48 @@
     </param>
   </xml>
 
+  <xml name="secondary_scoring_selection_anormaly">
+    <param name="secondary_scoring" type="select" multiple="true" label="Additional scoring used in multi-metric mode:" help="If the same metric with the primary is chosen, the metric will be ignored.">
+      <option value="binarize_auc_scorer">anomaly detection -- binarize_auc_scorer</option>
+      <option value="binarize_average_precision_scorer">anomaly detection -- binarize_average_precision_scorer</option>
+    </param>
+  </xml>
+
   <xml name="pre_dispatch" token_type="hidden" token_default_value="all" token_help="Number of predispatched jobs for parallel execution">
     <param argument="pre_dispatch" type="@TYPE@" value="@DEFAULT_VALUE@" optional="true" label="pre_dispatch" help="@HELP@"/>
   </xml>
 
   <xml name="search_cv_estimator">
-    <param name="infile_pipeline" type="data" format="zip" label="Choose the dataset containing pipeline object:"/>
+    <param name="infile_estimator" type="data" format="zip" label="Choose the dataset containing pipeline/estimator object"/>
     <section name="search_params_builder" title="Search parameters Builder" expanded="true">
-      <repeat name="param_set" min="1" max="20" title="Parameter setting for search:">
-        <conditional name="search_param_selector">
-          <param name="selected_param_type" type="select" label="Choose the transformation the parameter belongs to">
-            <option value="final_estimator_p" selected="true">Final estimator</option>
-            <option value="prep_1_p">Pre-processing step #1</option>
-            <option value="prep_2_p">Pre-processing step #2</option>
-            <option value="prep_3_p">Pre-processing step #3</option>
-            <option value="prep_4_p">Pre-processing step #4</option>
-            <option value="prep_5_p">Pre-processing step #5</option>
+      <param name="infile_params" type="data" format="tabular" label="Choose the dataset containing parameter names"/>
+      <repeat name="param_set" min="1" max="30" title="Parameter settings for search:">
+          <param name="sp_name" type="select" label="Choose a parameter name (with current value)">
+            <options from_dataset="infile_params" startswith="@">
+              <column name="name" index="2"/>
+              <column name="value" index="1"/>
+              <filter type="unique_value" name="unique_param" column="1"/>
+              <filter type="sort_by" name="sorted_param" column="2"/>
+            </options>
           </param>
-          <when value="final_estimator_p">
-            <expand macro="search_param_input" />
-          </when>
-          <when value="prep_1_p">
-            <expand macro="search_param_input" label="Pre_processing component #1  parameter:" help="One parameter per box. For example: with_centering: [True, False]."/>
-          </when>
-          <when value="prep_2_p">
-            <expand macro="search_param_input" label="Pre_processing component #2 parameter:" help="One parameter per box. For example: k: [3, 5, 7, 9]. See bottom for more examples"/>
-          </when>
-          <when value="prep_3_p">
-            <expand macro="search_param_input" label="Pre_processing component #3 parameter:" help="One parameter per box. For example: n_components: [1, 10, 100, 1000]. See bottom for more examples"/>
-          </when>
-          <when value="prep_4_p">
-            <expand macro="search_param_input" label="Pre_processing component #4 parameter:" help="One parameter per box. For example: n_components: [1, 10, 100, 1000]. See bottom for more examples"/>
-          </when>
-          <when value="prep_5_p">
-            <expand macro="search_param_input" label="Pre_processing component #5 parameter:" help="One parameter per box. For example: affinity: ['euclidean', 'l1', 'l2', 'manhattan']. See bottom for more examples"/>
-          </when>
-        </conditional>
+          <param name="sp_list" type="text" value="" optional="true" label="Search list" help="list or array-like, for example: [1, 10, 100, 1000], [True, False] and ['auto', 'sqrt', None]. See `help` section for more examples">
+            <sanitizer>
+              <valid initial="default">
+                <add value="&apos;"/>
+                <add value="&quot;"/>
+                <add value="["/>
+                <add value="]"/>
+              </valid>
+            </sanitizer>
+          </param>
       </repeat>
     </section>
   </xml>
 
-  <xml name="search_param_input" token_label="Estimator parameter:" token_help="One parameter per box. For example: C: [1, 10, 100, 1000]. See bottom for more examples">
-    <param name="search_p" type="text" value="" optional="true" label="@LABEL@" help="@HELP@">
-      <sanitizer>
-        <valid initial="default">
-          <add value="&apos;"/>
-          <add value="&quot;"/>
-          <add value="["/>
-          <add value="]"/>
-        </valid>
-      </sanitizer>
-    </param>
-  </xml>
-
   <xml name="search_cv_options">
       <expand macro="scoring_selection"/>
       <expand macro="model_validation_common_options"/>
-      <expand macro="pre_dispatch" value="2*n_jobs" help="Controls the number of jobs that get dispatched during parallel execution"/>
+      <!--expand macro="pre_dispatch" default_value="2*n_jobs" help="Controls the number of jobs that get dispatched during parallel execution"/-->
       <param argument="iid" type="boolean" truevalue="booltrue" falsevalue="boolfalse" checked="true" label="iid" help="If True, data is identically distributed across the folds"/>
       <param argument="refit" type="boolean" truevalue="booltrue" falsevalue="boolfalse" checked="true" label="refit" help="Refit an estimator using the best found parameters on the whole dataset."/>
       <param argument="error_score" type="boolean" truevalue="booltrue" falsevalue="boolfalse" checked="true" label="Raise fit error:" help="If false, the metric score is assigned to NaN if an error occurs in estimator fitting and FitFailedWarning is raised."/>
@@ -1403,12 +1454,12 @@
     <conditional name="estimator_selector">
       <param name="selected_module" type="select" label="Choose the module that contains target estimator:" >
         <expand macro="estimator_module_options">
-            <option value="customer_estimator">Load a customer estimator</option>
+            <option value="custom_estimator">Load a custom estimator</option>
         </expand>
       </param>
       <expand macro="estimator_suboptions">
-        <when value="customer_estimator">
-            <param name="c_estimator" type="data" format="zip" label="Choose the dataset containing the customer estimator or pipeline:"/>
+        <when value="custom_estimator">
+            <param name="c_estimator" type="data" format="zip" label="Choose the dataset containing the custom estimator or pipeline:"/>
         </when>
       </expand>
     </conditional>
@@ -1591,6 +1642,7 @@
         <option value="over_sampling.SMOTENC">over_sampling.SMOTENC</option>
         <option value="combine.SMOTEENN">combine.SMOTEENN</option>
         <option value="combine.SMOTETomek">combine.SMOTETomek</option>
+        <option value="Z_RandomOverSampler">Z_RandomOverSampler - for regression</option>
       </param>
       <when value="under_sampling.ClusterCentroids">
         <expand macro="estimator_params_text"
@@ -1668,6 +1720,33 @@
         <expand macro="estimator_params_text"
               help="Default(=blank): sampling_strategy='auto', random_state=None, smote=None, tomek=None."/>
       </when>
+      <when value="Z_RandomOverSampler">
+        <expand macro="estimator_params_text"
+              help="Default(=blank): sampling_strategy='auto', random_state=None, negative_thres=0, positive_thres=-1."/>
+      </when>
+    </conditional>
+  </xml>
+
+  <xml name="stacking_ensemble_inputs">
+    <section name="options" title="Advanced Options" expanded="false">
+        <yield/>
+        <param argument="use_features_in_secondary" type="boolean" truevalue="booltrue" falsevalue="boolfalse" checked="false"/>
+        <param argument="store_train_meta_features" type="boolean" truevalue="booltrue" falsevalue="boolfalse" checked="false"/>
+    </section>
+  </xml>
+
+  <xml name="stacking_base_estimator">
+    <conditional name="estimator_selector">
+        <param name="selected_module" type="select" label="Choose the module that contains target estimator:" >
+            <expand macro="estimator_module_options">
+                <option value="custom_estimator">Load a custom estimator</option>
+            </expand>
+        </param>
+        <expand macro="estimator_suboptions">
+            <when value="custom_estimator">
+                <param name="c_estimator" type="data" format="zip" label="Choose the dataset containing the custom estimator or pipeline"/>
+            </when>
+        </expand>
     </conditional>
   </xml>
author	bgruening
date	Tue, 14 May 2019 18:07:39 -0400
parents	4ba68dd788b3
children	86a086d2bbed