diff feature_importance.py @ 7:f4cb41f458fd draft default tip

planemo upload for repository https://github.com/goeckslab/gleam commit b430f8b466655878c3bf63b053655fdbf039ddb0
author goeckslab
date Wed, 09 Jul 2025 01:13:01 +0000
parents a32ff7201629
children
line wrap: on
line diff
--- a/feature_importance.py	Wed Jul 02 19:00:03 2025 +0000
+++ b/feature_importance.py	Wed Jul 09 01:13:01 2025 +0000
@@ -120,6 +120,9 @@
             used_features = model.feature_name_
         elif hasattr(model, "booster_") and hasattr(model.booster_, "feature_name"):
             used_features = model.booster_.feature_name()
+        elif hasattr(model, "feature_names_in_"):
+            # scikit‐learn's standard attribute for the names of features used during fit
+            used_features = list(model.feature_names_in_)
         else:
             used_features = X_transformed.columns
 
@@ -130,7 +133,14 @@
             plot_X = X_shap
             plot_title = f"SHAP Summary for {model_class_name} (TreeExplainer)"
         else:
-            sampled_X = X_transformed[used_features].sample(100, random_state=42)
+            logging.warning(f"len(X_transformed) = {len(X_transformed)}")
+            max_samples = 100
+            n_samples = min(max_samples, len(X_transformed))
+            sampled_X = X_transformed[used_features].sample(
+                n=n_samples,
+                replace=False,
+                random_state=42
+            )
             explainer = shap.KernelExplainer(model.predict, sampled_X)
             shap_values = explainer.shap_values(sampled_X)
             plot_X = sampled_X