Mercurial > repos > goeckslab > tabular_learner

--- a/base_model_trainer.py	Sat Nov 08 14:20:19 2025 +0000
+++ b/base_model_trainer.py	Wed Nov 26 17:49:36 2025 +0000
@@ -175,6 +175,7 @@
             "feature_interaction",
             "feature_ratio",
             "fix_imbalance",
+            "n_jobs",
         ]:
             val = getattr(self, attr, None)
             if val is not None:
--- a/pycaret_macros.xml	Sat Nov 08 14:20:19 2025 +0000
+++ b/pycaret_macros.xml	Wed Nov 26 17:49:36 2025 +0000
@@ -1,7 +1,7 @@
 <macros>
-    <token name="@TABULAR_LEARNER_VERSION@">0.1.1</token>
+    <token name="@TABULAR_LEARNER_VERSION@">0.1.2</token>
     <token name="@PYCARET_VERSION@">3.3.2</token>
-    <token name="@SUFFIX@">1</token>
+    <token name="@SUFFIX@">2</token>
     <token name="@PYCARET_PREDICT_VERSION@">@PYCARET_VERSION@+@SUFFIX@</token>
     <token name="@PROFILE@">21.05</token>
     <xml name="python_requirements">
--- a/pycaret_train.py	Sat Nov 08 14:20:19 2025 +0000
+++ b/pycaret_train.py	Wed Nov 26 17:49:36 2025 +0000
@@ -1,5 +1,6 @@
 import argparse
 import logging
+import os

 from pycaret_classification import ClassificationModelTrainer
 from pycaret_regression import RegressionModelTrainer
@@ -115,6 +116,13 @@
         help="Random seed for PyCaret setup",
     )
     parser.add_argument(
+        "--n-jobs",
+        dest="n_jobs",
+        type=int,
+        default=None,
+        help="Number of parallel jobs; defaults to GALAXY_SLOTS or 1 if unset/invalid.",
+    )
+    parser.add_argument(
         "--probability_threshold",
         type=float,
         default=None,
@@ -129,6 +137,16 @@

     args = parser.parse_args()

+    # Derive n_jobs from CLI or GALAXY_SLOTS env var
+    if args.n_jobs is not None:
+        n_jobs = args.n_jobs
+    else:
+        slots_str = os.environ.get("GALAXY_SLOTS")
+        try:
+            n_jobs = int(slots_str) if slots_str is not None else 1
+        except ValueError:
+            n_jobs = 1
+
     # Normalize cross-validation flags: --no_cross_validation overrides --cross_validation
     if args.no_cross_validation:
         args.cross_validation = False
@@ -149,6 +167,7 @@
         "feature_ratio": args.feature_ratio,
         "fix_imbalance": args.fix_imbalance,
         "tune_model": args.tune_model,
+        "n_jobs": n_jobs,
         "probability_threshold": args.probability_threshold,
         "best_model_metric": args.best_model_metric,
     }
--- a/tabular_learner.xml	Sat Nov 08 14:20:19 2025 +0000
+++ b/tabular_learner.xml	Wed Nov 26 17:49:36 2025 +0000
@@ -6,7 +6,7 @@
     <expand macro="python_requirements" />
     <command>
         <![CDATA[
-        python $__tool_directory__/pycaret_train.py --input_file '$input_file' --target_col '$target_feature' --output_dir '.' --random_seed '$random_seed'
+        python $__tool_directory__/pycaret_train.py --input_file '$input_file' --target_col '$target_feature' --output_dir '.' --random_seed '$random_seed' --n-jobs \${GALAXY_SLOTS:-1}
         #if $model_type == "classification"
             #if $classification_models
                 --models '$classification_models'
@@ -199,18 +199,18 @@
         <test>
             <param name="input_file" value="pcr.tsv"/>
             <param name="target_feature" value="11"/>
-            <param name="model_type" value="classification"/>
-            <param name="best_model_metric" value="F1"/>
+            <param name="model_selection|model_type" value="classification"/>
+            <param name="model_selection|best_model_metric" value="F1"/>
             <param name="random_seed" value="42"/>
-            <param name="customize_defaults" value="true"/>
-            <param name="train_size" value="0.8"/>
-            <param name="normalize" value="true"/>
-            <param name="feature_selection" value="true"/>
-            <param name="enable_cross_validation" value="true"/>
-            <param name="cross_validation_folds" value="5"/>
-            <param name="remove_outliers" value="true"/>
-            <param name="remove_multicollinearity" value="true"/>
-            <param name="probability_threshold" value="0.4" />
+            <param name="advanced_settings|customize_defaults" value="true"/>
+            <param name="advanced_settings|train_size" value="0.8"/>
+            <param name="advanced_settings|normalize" value="true"/>
+            <param name="advanced_settings|feature_selection" value="true"/>
+            <param name="advanced_settings|cross_validation|enable_cross_validation" value="true"/>
+            <param name="advanced_settings|cross_validation|cross_validation_folds" value="5"/>
+            <param name="advanced_settings|remove_outliers" value="true"/>
+            <param name="advanced_settings|remove_multicollinearity" value="true"/>
+            <param name="advanced_settings|probability_threshold" value="0.4" />
             <output name="model" file="expected_model_classification_customized.h5" compare="sim_size"/>
             <output name="comparison_result">
                 <assert_contents>
@@ -226,16 +226,16 @@
         <test>
             <param name="input_file" value="pcr.tsv"/>
             <param name="target_feature" value="11"/>
-            <param name="model_type" value="classification"/>
+            <param name="model_selection|model_type" value="classification"/>
             <param name="random_seed" value="42"/>
-            <param name="customize_defaults" value="true"/>
-            <param name="train_size" value="0.8"/>
-            <param name="normalize" value="true"/>
-            <param name="feature_selection" value="true"/>
-            <param name="enable_cross_validation" value="false"/>
-            <param name="remove_outliers" value="true"/>
-            <param name="remove_multicollinearity" value="true"/>
-            <param name="probability_threshold" value="0.6" />
+            <param name="advanced_settings|customize_defaults" value="true"/>
+            <param name="advanced_settings|train_size" value="0.8"/>
+            <param name="advanced_settings|normalize" value="true"/>
+            <param name="advanced_settings|feature_selection" value="true"/>
+            <param name="advanced_settings|cross_validation|enable_cross_validation" value="false"/>
+            <param name="advanced_settings|remove_outliers" value="true"/>
+            <param name="advanced_settings|remove_multicollinearity" value="true"/>
+            <param name="advanced_settings|probability_threshold" value="0.6" />
             <output name="model" file="expected_model_classification_customized_cross_off.h5" compare="sim_size"/>
             <output name="comparison_result">
                 <assert_contents>
@@ -249,7 +249,7 @@
         <test>
             <param name="input_file" value="pcr.tsv"/>
             <param name="target_feature" value="11"/>
-            <param name="model_type" value="classification"/>
+            <param name="model_selection|model_type" value="classification"/>
             <param name="random_seed" value="42"/>
             <param name="tune_model" value="true"/>
             <output name="model" file="expected_model_classification.h5" compare="sim_size"/>
@@ -260,12 +260,18 @@
                     <has_text text="Feature Importance" />
                 </assert_contents>
             </output>
-            <output name="best_model_csv" value="expected_best_model_classification.csv" />
+            <output name="best_model_csv">
+                <assert_contents>
+                    <has_text text="n_estimators,100" />
+                    <has_text text="num_leaves,31" />
+                    <has_text text="learning_rate,0.1" />
+                </assert_contents>
+            </output>
         </test>
         <test>
             <param name="input_file" value="pcr.tsv"/>
             <param name="target_feature" value="11"/>
-            <param name="model_type" value="classification"/>
+            <param name="model_selection|model_type" value="classification"/>
             <param name="random_seed" value="42"/>
             <output name="model" file="expected_model_classification.h5" compare="sim_size"/>
             <output name="comparison_result">
@@ -275,13 +281,19 @@
                     <has_text text="Feature Importance" />
                 </assert_contents>
             </output>
-            <output name="best_model_csv" value="expected_best_model_classification.csv" />
+            <output name="best_model_csv">
+                <assert_contents>
+                    <has_text text="n_estimators,100" />
+                    <has_text text="num_leaves,31" />
+                    <has_text text="learning_rate,0.1" />
+                </assert_contents>
+            </output>
         </test>
         <test>
             <param name="input_file" value="auto-mpg.tsv"/>
             <param name="target_feature" value="1"/>
-            <param name="model_type" value="regression"/>
-            <param name="best_model_metric" value="RMSE"/>
+            <param name="model_selection|model_type" value="regression"/>
+            <param name="model_selection|best_model_metric" value="RMSE"/>
             <param name="random_seed" value="42"/>
             <output name="model" file="expected_model_regression.h5" compare="sim_size" />
             <output name="comparison_result">
--- a/test-data/expected_best_model_classification.csv	Sat Nov 08 14:20:19 2025 +0000
+++ b/test-data/expected_best_model_classification.csv	Wed Nov 26 17:49:36 2025 +0000
@@ -9,7 +9,7 @@
 min_child_weight,0.001
 min_split_gain,0.0
 n_estimators,100
-n_jobs,-1
+n_jobs,1
 num_leaves,31
 objective,
 random_state,42
Binary file test-data/expected_model_classification.h5 has changed