annotate feature_importance.py @ 16:4fee4504646e draft default tip

planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
author goeckslab
date Fri, 28 Nov 2025 22:28:26 +0000
parents e674b9e946fb
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
1 import base64
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
2 import logging
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
3 import os
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
4
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
5 import matplotlib.pyplot as plt
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
6 import pandas as pd
6
a32ff7201629 planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 3
diff changeset
7 import shap
0
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
8 from pycaret.classification import ClassificationExperiment
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
9 from pycaret.regression import RegressionExperiment
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
10
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
11 logging.basicConfig(level=logging.DEBUG)
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
12 LOG = logging.getLogger(__name__)
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
13
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
14
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
15 class FeatureImportanceAnalyzer:
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
16 def __init__(
8
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
17 self,
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
18 task_type,
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
19 output_dir,
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
20 data_path=None,
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
21 data=None,
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
22 target_col=None,
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
23 exp=None,
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
24 best_model=None,
16
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
25 max_plot_features=None,
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
26 processed_data=None,
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
27 max_shap_rows=None,
8
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
28 ):
6
a32ff7201629 planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 3
diff changeset
29 self.task_type = task_type
a32ff7201629 planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 3
diff changeset
30 self.output_dir = output_dir
a32ff7201629 planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 3
diff changeset
31 self.exp = exp
a32ff7201629 planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 3
diff changeset
32 self.best_model = best_model
16
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
33 self._skip_messages = []
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
34 self.shap_total_features = None
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
35 self.shap_used_features = None
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
36 if isinstance(max_plot_features, int) and max_plot_features > 0:
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
37 self.max_plot_features = max_plot_features
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
38 elif max_plot_features is None:
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
39 self.max_plot_features = 30
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
40 else:
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
41 self.max_plot_features = None
6
a32ff7201629 planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 3
diff changeset
42
a32ff7201629 planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 3
diff changeset
43 if exp is not None:
a32ff7201629 planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 3
diff changeset
44 # Assume all configs (data, target) are in exp
a32ff7201629 planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 3
diff changeset
45 self.data = exp.dataset.copy()
a32ff7201629 planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 3
diff changeset
46 self.target = exp.target_param
a32ff7201629 planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 3
diff changeset
47 LOG.info("Using provided experiment object")
0
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
48 else:
6
a32ff7201629 planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 3
diff changeset
49 if data is not None:
a32ff7201629 planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 3
diff changeset
50 self.data = data
a32ff7201629 planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 3
diff changeset
51 LOG.info("Data loaded from memory")
a32ff7201629 planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 3
diff changeset
52 else:
a32ff7201629 planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 3
diff changeset
53 self.target_col = target_col
12
e674b9e946fb planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 11
diff changeset
54 self.data = pd.read_csv(data_path, sep=None, engine="python")
e674b9e946fb planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 11
diff changeset
55 self.data.columns = self.data.columns.str.replace(".", "_")
6
a32ff7201629 planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 3
diff changeset
56 self.data = self.data.fillna(self.data.median(numeric_only=True))
a32ff7201629 planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 3
diff changeset
57 self.target = self.data.columns[int(target_col) - 1]
8
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
58 self.exp = (
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
59 ClassificationExperiment()
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
60 if task_type == "classification"
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
61 else RegressionExperiment()
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
62 )
16
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
63 if processed_data is not None:
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
64 self.data = processed_data
6
a32ff7201629 planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 3
diff changeset
65
0
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
66 self.plots = {}
16
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
67 self.max_shap_rows = max_shap_rows
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
68
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
69 def _get_feature_names_from_model(self, model):
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
70 """Best-effort extraction of feature names seen by the estimator."""
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
71 if model is None:
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
72 return None
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
73
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
74 candidates = [model]
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
75 if hasattr(model, "named_steps"):
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
76 candidates.extend(model.named_steps.values())
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
77 elif hasattr(model, "steps"):
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
78 candidates.extend(step for _, step in model.steps)
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
79
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
80 for candidate in candidates:
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
81 names = getattr(candidate, "feature_names_in_", None)
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
82 if names is not None:
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
83 return list(names)
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
84 return None
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
85
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
86 def _get_transformed_frame(self, model=None, prefer_test=True):
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
87 """Return a DataFrame that mirrors the matrix fed to the estimator."""
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
88 key_order = ["X_test_transformed", "X_train_transformed"]
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
89 if not prefer_test:
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
90 key_order.reverse()
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
91 key_order.append("X_transformed")
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
92
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
93 feature_names = self._get_feature_names_from_model(model)
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
94 for key in key_order:
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
95 try:
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
96 frame = self.exp.get_config(key)
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
97 except KeyError:
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
98 continue
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
99 if frame is None:
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
100 continue
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
101 if isinstance(frame, pd.DataFrame):
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
102 return frame.copy()
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
103 try:
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
104 n_features = frame.shape[1]
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
105 except Exception:
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
106 continue
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
107 if feature_names and len(feature_names) == n_features:
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
108 return pd.DataFrame(frame, columns=feature_names)
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
109 # Fallback to positional names so downstream logic still works
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
110 return pd.DataFrame(frame, columns=[f"f{i}" for i in range(n_features)])
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
111 return None
0
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
112
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
113 def setup_pycaret(self):
12
e674b9e946fb planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 11
diff changeset
114 if self.exp is not None and hasattr(self.exp, "is_setup") and self.exp.is_setup:
6
a32ff7201629 planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 3
diff changeset
115 LOG.info("Experiment already set up. Skipping PyCaret setup.")
a32ff7201629 planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 3
diff changeset
116 return
0
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
117 LOG.info("Initializing PyCaret")
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
118 setup_params = {
12
e674b9e946fb planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 11
diff changeset
119 "target": self.target,
e674b9e946fb planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 11
diff changeset
120 "session_id": 123,
e674b9e946fb planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 11
diff changeset
121 "html": True,
e674b9e946fb planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 11
diff changeset
122 "log_experiment": False,
e674b9e946fb planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 11
diff changeset
123 "system_log": False,
0
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
124 }
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
125 self.exp.setup(self.data, **setup_params)
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
126
6
a32ff7201629 planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 3
diff changeset
127 def save_tree_importance(self):
12
e674b9e946fb planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 11
diff changeset
128 model = self.best_model or self.exp.get_config("best_model")
16
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
129 processed_frame = self._get_transformed_frame(model, prefer_test=False)
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
130 if processed_frame is None:
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
131 LOG.warning(
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
132 "Unable to determine transformed feature names; skipping tree importance plot."
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
133 )
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
134 self.tree_model_name = None
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
135 return
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
136 processed_features = list(processed_frame.columns)
6
a32ff7201629 planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 3
diff changeset
137
a32ff7201629 planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 3
diff changeset
138 importances = None
a32ff7201629 planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 3
diff changeset
139 model_type = model.__class__.__name__
12
e674b9e946fb planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 11
diff changeset
140 self.tree_model_name = model_type
0
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
141
12
e674b9e946fb planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 11
diff changeset
142 if hasattr(model, "feature_importances_"):
6
a32ff7201629 planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 3
diff changeset
143 importances = model.feature_importances_
12
e674b9e946fb planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 11
diff changeset
144 elif hasattr(model, "coef_"):
6
a32ff7201629 planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 3
diff changeset
145 importances = abs(model.coef_).flatten()
a32ff7201629 planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 3
diff changeset
146 else:
8
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
147 LOG.warning(
12
e674b9e946fb planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 11
diff changeset
148 f"Model {model_type} does not have feature_importances_ or coef_. Skipping tree importance."
8
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
149 )
12
e674b9e946fb planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 11
diff changeset
150 self.tree_model_name = None
6
a32ff7201629 planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 3
diff changeset
151 return
a32ff7201629 planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 3
diff changeset
152
a32ff7201629 planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 3
diff changeset
153 if len(importances) != len(processed_features):
16
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
154 model_feature_names = self._get_feature_names_from_model(model)
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
155 if model_feature_names and len(model_feature_names) == len(importances):
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
156 processed_features = model_feature_names
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
157 else:
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
158 LOG.warning(
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
159 "Importances (%s) != features (%s). Skipping tree importance.",
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
160 len(importances),
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
161 len(processed_features),
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
162 )
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
163 self.tree_model_name = None
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
164 return
6
a32ff7201629 planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 3
diff changeset
165
8
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
166 feature_importances = pd.DataFrame(
12
e674b9e946fb planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 11
diff changeset
167 {"Feature": processed_features, "Importance": importances}
e674b9e946fb planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 11
diff changeset
168 ).sort_values(by="Importance", ascending=False)
16
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
169 cap = (
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
170 min(self.max_plot_features, len(feature_importances))
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
171 if self.max_plot_features is not None
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
172 else len(feature_importances)
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
173 )
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
174 plot_importances = feature_importances.head(cap)
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
175 if cap < len(feature_importances):
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
176 LOG.info(
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
177 "Tree importance plot limited to top %s of %s features",
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
178 cap,
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
179 len(feature_importances),
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
180 )
0
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
181 plt.figure(figsize=(10, 6))
16
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
182 plt.barh(
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
183 plot_importances["Feature"],
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
184 plot_importances["Importance"],
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
185 )
12
e674b9e946fb planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 11
diff changeset
186 plt.xlabel("Importance")
16
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
187 plt.title(f"Feature Importance ({model_type}) (top {cap})")
12
e674b9e946fb planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 11
diff changeset
188 plot_path = os.path.join(self.output_dir, "tree_importance.png")
16
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
189 plt.tight_layout()
12
e674b9e946fb planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 11
diff changeset
190 plt.savefig(plot_path, bbox_inches="tight")
0
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
191 plt.close()
12
e674b9e946fb planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 11
diff changeset
192 self.plots["tree_importance"] = plot_path
0
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
193
12
e674b9e946fb planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 11
diff changeset
194 def save_shap_values(self, max_samples=None, max_display=None, max_features=None):
8
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
195 model = self.best_model or self.exp.get_config("best_model")
6
a32ff7201629 planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 3
diff changeset
196
16
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
197 X_data = self._get_transformed_frame(model)
8
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
198 if X_data is None:
12
e674b9e946fb planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 11
diff changeset
199 raise RuntimeError("No transformed dataset found for SHAP.")
e674b9e946fb planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 11
diff changeset
200
e674b9e946fb planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 11
diff changeset
201 n_rows, n_features = X_data.shape
16
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
202 self.shap_total_features = n_features
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
203 feature_cap = (
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
204 min(self.max_plot_features, n_features)
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
205 if self.max_plot_features is not None
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
206 else n_features
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
207 )
12
e674b9e946fb planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 11
diff changeset
208 if max_features is None:
16
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
209 max_features = feature_cap
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
210 else:
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
211 max_features = min(max_features, feature_cap)
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
212 display_features = list(X_data.columns)
8
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
213
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
214 try:
12
e674b9e946fb planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 11
diff changeset
215 if hasattr(model, "feature_importances_"):
e674b9e946fb planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 11
diff changeset
216 importances = pd.Series(
e674b9e946fb planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 11
diff changeset
217 model.feature_importances_, index=X_data.columns
e674b9e946fb planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 11
diff changeset
218 )
e674b9e946fb planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 11
diff changeset
219 top_features = importances.nlargest(max_features).index
e674b9e946fb planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 11
diff changeset
220 elif hasattr(model, "coef_"):
e674b9e946fb planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 11
diff changeset
221 coef = abs(model.coef_).flatten()
e674b9e946fb planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 11
diff changeset
222 importances = pd.Series(coef, index=X_data.columns)
e674b9e946fb planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 11
diff changeset
223 top_features = importances.nlargest(max_features).index
e674b9e946fb planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 11
diff changeset
224 else:
e674b9e946fb planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 11
diff changeset
225 variances = X_data.var()
e674b9e946fb planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 11
diff changeset
226 top_features = variances.nlargest(max_features).index
e674b9e946fb planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 11
diff changeset
227
16
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
228 candidate_features = list(top_features)
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
229 missing = [f for f in candidate_features if f not in X_data.columns]
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
230 display_features = [f for f in candidate_features if f in X_data.columns]
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
231 if missing:
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
232 LOG.warning(
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
233 "Dropping %s transformed feature(s) not present in SHAP frame: %s",
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
234 len(missing),
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
235 missing[:5],
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
236 )
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
237 if display_features and len(display_features) < n_features:
12
e674b9e946fb planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 11
diff changeset
238 LOG.info(
16
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
239 "Restricting SHAP display to top %s of %s features",
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
240 len(display_features),
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
241 n_features,
12
e674b9e946fb planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 11
diff changeset
242 )
16
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
243 elif not display_features:
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
244 display_features = list(X_data.columns)
12
e674b9e946fb planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 11
diff changeset
245 except Exception as e:
e674b9e946fb planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 11
diff changeset
246 LOG.warning(
e674b9e946fb planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 11
diff changeset
247 f"Feature limiting failed: {e}. Using all {n_features} features."
e674b9e946fb planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 11
diff changeset
248 )
16
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
249 display_features = list(X_data.columns)
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
250
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
251 self.shap_used_features = len(display_features)
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
252
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
253 # Apply the column restriction so SHAP only runs on the selected features.
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
254 if display_features:
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
255 X_data = X_data[display_features]
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
256 n_rows, n_features = X_data.shape
8
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
257
12
e674b9e946fb planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 11
diff changeset
258 # --- Adaptive row subsampling ---
e674b9e946fb planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 11
diff changeset
259 if max_samples is None:
e674b9e946fb planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 11
diff changeset
260 if n_rows <= 500:
e674b9e946fb planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 11
diff changeset
261 max_samples = n_rows
e674b9e946fb planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 11
diff changeset
262 elif n_rows <= 5000:
e674b9e946fb planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 11
diff changeset
263 max_samples = 500
e674b9e946fb planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 11
diff changeset
264 else:
e674b9e946fb planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 11
diff changeset
265 max_samples = min(1000, int(n_rows * 0.1))
e674b9e946fb planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 11
diff changeset
266
16
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
267 if self.max_shap_rows is not None:
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
268 max_samples = min(max_samples, self.max_shap_rows)
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
269
12
e674b9e946fb planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 11
diff changeset
270 if n_rows > max_samples:
e674b9e946fb planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 11
diff changeset
271 LOG.info(f"Subsampling SHAP rows: {max_samples} of {n_rows}")
e674b9e946fb planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 11
diff changeset
272 X_data = X_data.sample(max_samples, random_state=42)
8
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
273
12
e674b9e946fb planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 11
diff changeset
274 # --- Adaptive feature display ---
16
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
275 display_cap = (
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
276 min(self.max_plot_features, len(display_features))
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
277 if self.max_plot_features is not None
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
278 else len(display_features)
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
279 )
12
e674b9e946fb planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 11
diff changeset
280 if max_display is None:
16
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
281 max_display = display_cap
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
282 else:
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
283 max_display = min(max_display, display_cap)
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
284 if not display_features:
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
285 display_features = list(X_data.columns)
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
286 max_display = len(display_features)
12
e674b9e946fb planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 11
diff changeset
287
e674b9e946fb planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 11
diff changeset
288 # Background set
e674b9e946fb planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 11
diff changeset
289 bg = X_data.sample(min(len(X_data), 100), random_state=42)
e674b9e946fb planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 11
diff changeset
290 predict_fn = (
e674b9e946fb planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 11
diff changeset
291 model.predict_proba if hasattr(model, "predict_proba") else model.predict
e674b9e946fb planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 11
diff changeset
292 )
e674b9e946fb planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 11
diff changeset
293
e674b9e946fb planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 11
diff changeset
294 # Optimized explainer
16
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
295 explainer = None
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
296 explainer_label = None
12
e674b9e946fb planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 11
diff changeset
297 if hasattr(model, "feature_importances_"):
e674b9e946fb planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 11
diff changeset
298 explainer = shap.TreeExplainer(
e674b9e946fb planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 11
diff changeset
299 model, bg, feature_perturbation="tree_path_dependent", n_jobs=-1
e674b9e946fb planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 11
diff changeset
300 )
16
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
301 explainer_label = "tree_path_dependent"
12
e674b9e946fb planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 11
diff changeset
302 elif hasattr(model, "coef_"):
e674b9e946fb planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 11
diff changeset
303 explainer = shap.LinearExplainer(model, bg)
16
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
304 explainer_label = "linear"
12
e674b9e946fb planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 11
diff changeset
305 else:
e674b9e946fb planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 11
diff changeset
306 explainer = shap.Explainer(predict_fn, bg)
16
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
307 explainer_label = explainer.__class__.__name__
6
a32ff7201629 planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 3
diff changeset
308
11
4eca9d109de1 planemo upload for repository https://github.com/goeckslab/gleam commit 55deacbbc78a00f27d789e11d563ba49dfb9cf9e
goeckslab
parents: 8
diff changeset
309 try:
12
e674b9e946fb planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 11
diff changeset
310 shap_values = explainer(X_data)
11
4eca9d109de1 planemo upload for repository https://github.com/goeckslab/gleam commit 55deacbbc78a00f27d789e11d563ba49dfb9cf9e
goeckslab
parents: 8
diff changeset
311 self.shap_model_name = explainer.__class__.__name__
4eca9d109de1 planemo upload for repository https://github.com/goeckslab/gleam commit 55deacbbc78a00f27d789e11d563ba49dfb9cf9e
goeckslab
parents: 8
diff changeset
312 except Exception as e:
16
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
313 error_message = str(e)
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
314 needs_tree_fallback = (
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
315 hasattr(model, "feature_importances_")
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
316 and "does not cover all the leaves" in error_message.lower()
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
317 )
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
318 feature_name_mismatch = "feature names should match" in error_message.lower()
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
319 if needs_tree_fallback:
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
320 LOG.warning(
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
321 "SHAP computation failed using '%s' perturbation (%s). "
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
322 "Retrying with interventional perturbation.",
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
323 explainer_label,
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
324 error_message,
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
325 )
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
326 try:
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
327 explainer = shap.TreeExplainer(
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
328 model,
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
329 bg,
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
330 feature_perturbation="interventional",
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
331 n_jobs=-1,
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
332 )
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
333 shap_values = explainer(X_data)
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
334 self.shap_model_name = (
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
335 f"{explainer.__class__.__name__} (interventional)"
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
336 )
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
337 except Exception as retry_exc:
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
338 LOG.error(
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
339 "SHAP computation failed even after fallback: %s",
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
340 retry_exc,
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
341 )
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
342 self.shap_model_name = None
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
343 return
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
344 elif feature_name_mismatch:
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
345 LOG.warning(
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
346 "SHAP computation failed due to feature-name mismatch (%s). "
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
347 "Falling back to model-agnostic SHAP explainer.",
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
348 error_message,
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
349 )
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
350 try:
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
351 agnostic_explainer = shap.Explainer(predict_fn, bg)
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
352 shap_values = agnostic_explainer(X_data)
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
353 self.shap_model_name = (
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
354 f"{agnostic_explainer.__class__.__name__} (fallback)"
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
355 )
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
356 except Exception as fallback_exc:
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
357 LOG.error(
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
358 "Model-agnostic SHAP fallback also failed: %s",
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
359 fallback_exc,
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
360 )
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
361 self.shap_model_name = None
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
362 return
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
363 else:
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
364 LOG.error(f"SHAP computation failed: {e}")
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
365 self.shap_model_name = None
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
366 return
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
367
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
368 def _limit_explanation_features(explanation):
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
369 if len(display_features) >= n_features:
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
370 return explanation
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
371 try:
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
372 limited = explanation[:, display_features]
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
373 LOG.info(
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
374 "SHAP explanation trimmed to %s display features.",
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
375 len(display_features),
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
376 )
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
377 return limited
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
378 except Exception as exc:
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
379 LOG.warning(
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
380 "Failed to restrict SHAP explanation to top features "
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
381 "(sample=%s); plot will include all features. Error: %s",
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
382 display_features[:5],
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
383 exc,
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
384 )
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
385 # Keep using full feature list if trimming fails
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
386 return explanation
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
387
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
388 shap_shape = getattr(shap_values, "shape", None)
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
389 class_labels = list(getattr(model, "classes_", []))
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
390 shap_outputs = []
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
391 if shap_shape is not None and len(shap_shape) == 3:
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
392 output_count = shap_shape[2]
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
393 LOG.info("Detected multi-output SHAP explanation with %s classes.", output_count)
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
394 for class_idx in range(output_count):
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
395 try:
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
396 class_expl = shap_values[..., class_idx]
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
397 except Exception as exc:
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
398 LOG.warning(
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
399 "Failed to extract SHAP explanation for class index %s: %s",
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
400 class_idx,
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
401 exc,
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
402 )
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
403 continue
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
404 label = (
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
405 class_labels[class_idx]
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
406 if class_labels and class_idx < len(class_labels)
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
407 else class_idx
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
408 )
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
409 shap_outputs.append((class_idx, label, class_expl))
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
410 else:
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
411 shap_outputs.append((None, None, shap_values))
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
412
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
413 if not shap_outputs:
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
414 LOG.error("No SHAP outputs available for plotting.")
11
4eca9d109de1 planemo upload for repository https://github.com/goeckslab/gleam commit 55deacbbc78a00f27d789e11d563ba49dfb9cf9e
goeckslab
parents: 8
diff changeset
415 self.shap_model_name = None
4eca9d109de1 planemo upload for repository https://github.com/goeckslab/gleam commit 55deacbbc78a00f27d789e11d563ba49dfb9cf9e
goeckslab
parents: 8
diff changeset
416 return
8
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
417
16
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
418 # --- Plot SHAP summary (one per class if needed) ---
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
419 for class_idx, class_label, class_expl in shap_outputs:
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
420 expl_to_plot = _limit_explanation_features(class_expl)
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
421 suffix = ""
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
422 plot_key = "shap_summary"
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
423 if class_idx is not None:
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
424 safe_label = str(class_label).replace("/", "_").replace(" ", "_")
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
425 suffix = f"_class_{safe_label}"
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
426 plot_key = f"shap_summary_class_{safe_label}"
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
427 out_filename = f"shap_summary{suffix}.png"
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
428 out_path = os.path.join(self.output_dir, out_filename)
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
429 plt.figure()
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
430 shap.plots.beeswarm(expl_to_plot, max_display=max_display, show=False)
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
431 title = f"SHAP Summary for {model.__class__.__name__}"
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
432 if class_idx is not None:
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
433 title += f" (class {class_label})"
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
434 plt.title(f"{title} (top {max_display} features)")
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
435 plt.tight_layout()
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
436 plt.savefig(out_path, bbox_inches="tight")
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
437 plt.close()
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
438 self.plots[plot_key] = out_path
8
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
439
12
e674b9e946fb planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 11
diff changeset
440 # --- Log summary ---
e674b9e946fb planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 11
diff changeset
441 LOG.info(
16
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
442 "SHAP summary completed with %s rows and %s features "
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
443 "(displaying top %s) across %s output(s).",
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
444 X_data.shape[0],
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
445 X_data.shape[1],
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
446 max_display,
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
447 len(shap_outputs),
12
e674b9e946fb planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 11
diff changeset
448 )
0
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
449
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
450 def generate_html_report(self):
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
451 LOG.info("Generating HTML report")
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
452 plots_html = ""
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
453 for plot_name, plot_path in self.plots.items():
12
e674b9e946fb planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 11
diff changeset
454 if plot_name == "tree_importance" and not getattr(
e674b9e946fb planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 11
diff changeset
455 self, "tree_model_name", None
8
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
456 ):
6
a32ff7201629 planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 3
diff changeset
457 continue
0
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
458 encoded_image = self.encode_image_to_base64(plot_path)
12
e674b9e946fb planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 11
diff changeset
459 if plot_name == "tree_importance" and getattr(
e674b9e946fb planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 11
diff changeset
460 self, "tree_model_name", None
8
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
461 ):
12
e674b9e946fb planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 11
diff changeset
462 section_title = f"Feature importance from {self.tree_model_name}"
e674b9e946fb planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 11
diff changeset
463 elif plot_name == "shap_summary":
8
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
464 section_title = (
12
e674b9e946fb planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 11
diff changeset
465 f"SHAP Summary from {getattr(self, 'shap_model_name', 'model')}"
8
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
466 )
16
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
467 elif plot_name.startswith("shap_summary_class_"):
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
468 class_label = plot_name.replace("shap_summary_class_", "")
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
469 section_title = (
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
470 f"SHAP Summary for class {class_label} "
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
471 f"({getattr(self, 'shap_model_name', 'model')})"
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
472 )
6
a32ff7201629 planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 3
diff changeset
473 else:
a32ff7201629 planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 3
diff changeset
474 section_title = plot_name
0
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
475 plots_html += f"""
16
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
476 <div class="plot" id="{plot_name}" style="text-align:center;margin-bottom:24px;">
6
a32ff7201629 planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 3
diff changeset
477 <h2>{section_title}</h2>
16
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
478 <img src="data:image/png;base64,{encoded_image}" alt="{plot_name}"
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 12
diff changeset
479 style="max-width:95%;height:auto;display:block;margin:0 auto;border:1px solid #ddd;padding:8px;background:#fff;">
0
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
480 </div>
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
481 """
12
e674b9e946fb planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 11
diff changeset
482 return f"{plots_html}"
0
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
483
6
a32ff7201629 planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 3
diff changeset
484 def encode_image_to_base64(self, img_path):
12
e674b9e946fb planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 11
diff changeset
485 with open(img_path, "rb") as img_file:
e674b9e946fb planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 11
diff changeset
486 return base64.b64encode(img_file.read()).decode("utf-8")
0
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
487
6
a32ff7201629 planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 3
diff changeset
488 def run(self):
8
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
489 if (
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
490 self.exp is None
12
e674b9e946fb planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 11
diff changeset
491 or not hasattr(self.exp, "is_setup")
8
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
492 or not self.exp.is_setup
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
493 ):
6
a32ff7201629 planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 3
diff changeset
494 self.setup_pycaret()
a32ff7201629 planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 3
diff changeset
495 self.save_tree_importance()
a32ff7201629 planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 3
diff changeset
496 self.save_shap_values()
12
e674b9e946fb planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 11
diff changeset
497 return self.generate_html_report()