tabular_learner: pycaret_regression.py comparison

comparison pycaret_regression.py @ 4:11fdac5affb3 draft

planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54

author	goeckslab
date	Fri, 25 Jul 2025 19:02:12 +0000
parents	209b663a4f62
children

comparison

equal deleted inserted replaced

-:f6a65e05d6ec
+:11fdac5affb3
 import logging
 from base_model_trainer import BaseModelTrainer
 from dashboard import generate_regression_explainer_dashboard
 from pycaret.regression import RegressionExperiment
-from utils import add_hr_to_html, add_plot_to_html
 LOG = logging.getLogger(__name__)
 class RegressionModelTrainer(BaseModelTrainer):
 def __init__(
 self,
 input_file,
 target_col,
 output_dir,
 task_type,
 random_seed,
 test_file=None,
-**kwargs):
+**kwargs,
+):
 super().__init__(
 input_file,
 target_col,
 output_dir,
 task_type,
 random_seed,
 test_file,
-**kwargs)
+**kwargs,
+)
+# The BaseModelTrainer.setup_pycaret will set self.exp appropriately
+# But we reassign here for clarity
 self.exp = RegressionExperiment()
 def save_dashboard(self):
 LOG.info("Saving explainer dashboard")
-dashboard = generate_regression_explainer_dashboard(self.exp,
+dashboard = generate_regression_explainer_dashboard(self.exp, self.best_model)
-self.best_model)
 dashboard.save_html("dashboard.html")
 def generate_plots(self):
 LOG.info("Generating and saving plots")
-plots = ['residuals', 'error', 'cooks',
+plots = [
-'learning', 'vc', 'manifold',
+"residuals",
-'rfe', 'feature', 'feature_all']
+"error",
+"cooks",
+"learning",
+"vc",
+"manifold",
+"rfe",
+"feature",
+"feature_all",
+]
 for plot_name in plots:
 try:
-plot_path = self.exp.plot_model(self.best_model,
+plot_path = self.exp.plot_model(
-plot=plot_name, save=True)
+self.best_model, plot=plot_name, save=True
+)
 self.plots[plot_name] = plot_path
 except Exception as e:
 LOG.error(f"Error generating plot {plot_name}: {e}")
 continue
 X_test = self.exp.X_test_transformed.copy()
 y_test = self.exp.y_test_transformed
 try:
 explainer = RegressionExplainer(self.best_model, X_test, y_test)
-self.expaliner = explainer
-plots_explainer_html = ""
 except Exception as e:
 LOG.error(f"Error creating explainer: {e}")
-self.plots_explainer_html = None
 return
+# --- 1) SHAP mean impact (average absolute SHAP values) ---
 try:
-fig_importance = explainer.plot_importances()
+self.explainer_plots["shap_mean"] = explainer.plot_importances()
-plots_explainer_html += add_plot_to_html(fig_importance)
-plots_explainer_html += add_hr_to_html()
 except Exception as e:
-LOG.error(f"Error generating plot importance: {e}")
+LOG.error(f"Error generating SHAP mean importance: {e}")
+# --- 2) SHAP permutation importance ---
 try:
-fig_importance_permutation = \
+self.explainer_plots["shap_perm"] = explainer.plot_importances_permutation(
-explainer.plot_importances_permutation(
+kind="permutation"
-kind="permutation")
+)
-plots_explainer_html += add_plot_to_html(
-fig_importance_permutation)
-plots_explainer_html += add_hr_to_html()
 except Exception as e:
-LOG.error(f"Error generating plot importance permutation: {e}")
+LOG.error(f"Error generating SHAP permutation importance: {e}")
+# Pre-filter features so we never call PDP or residual-vs-feature on missing cols
+valid_feats = []
+for feat in self.features_name:
+if feat in explainer.X.columns or feat in explainer.onehot_cols:
+valid_feats.append(feat)
+else:
+LOG.warning(f"Skipping feature {feat!r}: not found in explainer data")
+# --- 3) Partial Dependence Plots (PDPs) per feature ---
+for feature in valid_feats:
+try:
+fig_pdp = explainer.plot_pdp(feature)
+self.explainer_plots[f"pdp__{feature}"] = fig_pdp
+except AssertionError as ae:
+LOG.warning(f"PDP AssertionError for {feature!r}: {ae}")
+except Exception as e:
+LOG.error(f"Error generating PDP for {feature}: {e}")
+# --- 4) Predicted vs Actual plot ---
 try:
-for feature in self.features_name:
+self.explainer_plots["predicted_vs_actual"] = explainer.plot_predicted_vs_actual()
-fig_shap = explainer.plot_pdp(feature)
-plots_explainer_html += add_plot_to_html(fig_shap)
-plots_explainer_html += add_hr_to_html()
 except Exception as e:
-LOG.error(f"Error generating plot shap dependence: {e}")
+LOG.error(f"Error generating Predicted vs Actual plot: {e}")
-# try:
+# --- 5) Global residuals distribution ---
-#     for feature in self.features_name:
+try:
-#         fig_interaction = explainer.plot_interaction(col=feature)
+self.explainer_plots["residuals"] = explainer.plot_residuals()
-#         plots_explainer_html += add_plot_to_html(fig_interaction)
+except Exception as e:
-# except Exception as e:
+LOG.error(f"Error generating Residuals plot: {e}")
-#     LOG.error(f"Error generating plot shap interaction: {e}")
-try:
+# --- 6) Residuals vs each feature ---
-for feature in self.features_name:
+for feature in valid_feats:
-fig_interactions_importance = \
+try:
-explainer.plot_interactions_importance(
+fig_res_vs_feat = explainer.plot_residuals_vs_feature(feature)
-col=feature)
+self.explainer_plots[f"residuals_vs_feature__{feature}"] = fig_res_vs_feat
-plots_explainer_html += add_plot_to_html(
+except AssertionError as ae:
-fig_interactions_importance)
+LOG.warning(f"Residuals-vs-feature AssertionError for {feature!r}: {ae}")
-plots_explainer_html += add_hr_to_html()
+except Exception as e:
-except Exception as e:
+LOG.error(f"Error generating Residuals vs {feature}: {e}")
-LOG.error(f"Error generating plot shap summary: {e}")
-# Regression specific plots
-try:
-fig_pred_actual = explainer.plot_predicted_vs_actual()
-plots_explainer_html += add_plot_to_html(fig_pred_actual)
-plots_explainer_html += add_hr_to_html()
-except Exception as e:
-LOG.error(f"Error generating plot prediction vs actual: {e}")
-try:
-fig_residuals = explainer.plot_residuals()
-plots_explainer_html += add_plot_to_html(fig_residuals)
-plots_explainer_html += add_hr_to_html()
-except Exception as e:
-LOG.error(f"Error generating plot residuals: {e}")
-try:
-for feature in self.features_name:
-fig_residuals_vs_feature = \
-explainer.plot_residuals_vs_feature(feature)
-plots_explainer_html += add_plot_to_html(
-fig_residuals_vs_feature)
-plots_explainer_html += add_hr_to_html()
-except Exception as e:
-LOG.error(f"Error generating plot residuals vs feature: {e}")
-self.plots_explainer_html = plots_explainer_html

Mercurial > repos > goeckslab > tabular_learner

comparison pycaret_regression.py @ 4:11fdac5affb3 draft