annotate feature_importance.py @ 13:bf0df21a1ea3 draft default tip

planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
author goeckslab
date Sat, 06 Dec 2025 14:20:23 +0000
parents 15707141e7da
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
1 import base64
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
2 import logging
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
3 import os
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
4
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
5 import matplotlib.pyplot as plt
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
6 import pandas as pd
2
77c88226bfde planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
7 import shap
0
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
8 from pycaret.classification import ClassificationExperiment
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
9 from pycaret.regression import RegressionExperiment
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
10
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
11 logging.basicConfig(level=logging.DEBUG)
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
12 LOG = logging.getLogger(__name__)
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
13
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
14
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
15 class FeatureImportanceAnalyzer:
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
16 def __init__(
4
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
17 self,
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
18 task_type,
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
19 output_dir,
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
20 data_path=None,
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
21 data=None,
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
22 target_col=None,
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
23 exp=None,
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
24 best_model=None,
12
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
25 max_plot_features=None,
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
26 processed_data=None,
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
27 max_shap_rows=None,
4
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
28 ):
2
77c88226bfde planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
29 self.task_type = task_type
77c88226bfde planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
30 self.output_dir = output_dir
77c88226bfde planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
31 self.exp = exp
77c88226bfde planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
32 self.best_model = best_model
12
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
33 self._skip_messages = []
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
34 self.shap_total_features = None
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
35 self.shap_used_features = None
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
36 if isinstance(max_plot_features, int) and max_plot_features > 0:
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
37 self.max_plot_features = max_plot_features
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
38 elif max_plot_features is None:
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
39 self.max_plot_features = 30
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
40 else:
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
41 self.max_plot_features = None
2
77c88226bfde planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
42
77c88226bfde planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
43 if exp is not None:
77c88226bfde planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
44 # Assume all configs (data, target) are in exp
77c88226bfde planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
45 self.data = exp.dataset.copy()
77c88226bfde planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
46 self.target = exp.target_param
77c88226bfde planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
47 LOG.info("Using provided experiment object")
0
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
48 else:
2
77c88226bfde planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
49 if data is not None:
77c88226bfde planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
50 self.data = data
77c88226bfde planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
51 LOG.info("Data loaded from memory")
77c88226bfde planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
52 else:
77c88226bfde planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
53 self.target_col = target_col
8
ba45bc057d70 planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 7
diff changeset
54 self.data = pd.read_csv(data_path, sep=None, engine="python")
ba45bc057d70 planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 7
diff changeset
55 self.data.columns = self.data.columns.str.replace(".", "_")
2
77c88226bfde planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
56 self.data = self.data.fillna(self.data.median(numeric_only=True))
77c88226bfde planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
57 self.target = self.data.columns[int(target_col) - 1]
4
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
58 self.exp = (
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
59 ClassificationExperiment()
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
60 if task_type == "classification"
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
61 else RegressionExperiment()
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
62 )
12
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
63 if processed_data is not None:
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
64 self.data = processed_data
2
77c88226bfde planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
65
0
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
66 self.plots = {}
12
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
67 self.max_shap_rows = max_shap_rows
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
68
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
69 def _get_feature_names_from_model(self, model):
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
70 """Best-effort extraction of feature names seen by the estimator."""
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
71 if model is None:
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
72 return None
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
73
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
74 candidates = [model]
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
75 if hasattr(model, "named_steps"):
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
76 candidates.extend(model.named_steps.values())
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
77 elif hasattr(model, "steps"):
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
78 candidates.extend(step for _, step in model.steps)
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
79
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
80 for candidate in candidates:
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
81 names = getattr(candidate, "feature_names_in_", None)
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
82 if names is not None:
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
83 return list(names)
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
84 return None
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
85
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
86 def _get_transformed_frame(self, model=None, prefer_test=True):
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
87 """Return a DataFrame that mirrors the matrix fed to the estimator."""
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
88 key_order = ["X_test_transformed", "X_train_transformed"]
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
89 if not prefer_test:
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
90 key_order.reverse()
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
91 key_order.append("X_transformed")
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
92
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
93 feature_names = self._get_feature_names_from_model(model)
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
94 for key in key_order:
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
95 try:
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
96 frame = self.exp.get_config(key)
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
97 except KeyError:
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
98 continue
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
99 if frame is None:
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
100 continue
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
101 if isinstance(frame, pd.DataFrame):
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
102 return frame.copy()
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
103 try:
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
104 n_features = frame.shape[1]
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
105 except Exception:
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
106 continue
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
107 if feature_names and len(feature_names) == n_features:
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
108 return pd.DataFrame(frame, columns=feature_names)
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
109 # Fallback to positional names so downstream logic still works
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
110 return pd.DataFrame(frame, columns=[f"f{i}" for i in range(n_features)])
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
111 return None
0
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
112
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
113 def setup_pycaret(self):
8
ba45bc057d70 planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 7
diff changeset
114 if self.exp is not None and hasattr(self.exp, "is_setup") and self.exp.is_setup:
2
77c88226bfde planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
115 LOG.info("Experiment already set up. Skipping PyCaret setup.")
77c88226bfde planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
116 return
0
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
117 LOG.info("Initializing PyCaret")
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
118 setup_params = {
8
ba45bc057d70 planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 7
diff changeset
119 "target": self.target,
ba45bc057d70 planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 7
diff changeset
120 "session_id": 123,
ba45bc057d70 planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 7
diff changeset
121 "html": True,
ba45bc057d70 planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 7
diff changeset
122 "log_experiment": False,
ba45bc057d70 planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 7
diff changeset
123 "system_log": False,
0
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
124 }
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
125 self.exp.setup(self.data, **setup_params)
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
126
2
77c88226bfde planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
127 def save_tree_importance(self):
8
ba45bc057d70 planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 7
diff changeset
128 model = self.best_model or self.exp.get_config("best_model")
12
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
129 processed_frame = self._get_transformed_frame(model, prefer_test=False)
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
130 if processed_frame is None:
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
131 LOG.warning(
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
132 "Unable to determine transformed feature names; skipping tree importance plot."
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
133 )
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
134 self.tree_model_name = None
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
135 return
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
136 processed_features = list(processed_frame.columns)
2
77c88226bfde planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
137
77c88226bfde planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
138 importances = None
77c88226bfde planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
139 model_type = model.__class__.__name__
8
ba45bc057d70 planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 7
diff changeset
140 self.tree_model_name = model_type
0
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
141
8
ba45bc057d70 planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 7
diff changeset
142 if hasattr(model, "feature_importances_"):
2
77c88226bfde planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
143 importances = model.feature_importances_
8
ba45bc057d70 planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 7
diff changeset
144 elif hasattr(model, "coef_"):
2
77c88226bfde planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
145 importances = abs(model.coef_).flatten()
77c88226bfde planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
146 else:
4
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
147 LOG.warning(
8
ba45bc057d70 planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 7
diff changeset
148 f"Model {model_type} does not have feature_importances_ or coef_. Skipping tree importance."
4
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
149 )
8
ba45bc057d70 planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 7
diff changeset
150 self.tree_model_name = None
2
77c88226bfde planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
151 return
77c88226bfde planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
152
77c88226bfde planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
153 if len(importances) != len(processed_features):
12
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
154 model_feature_names = self._get_feature_names_from_model(model)
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
155 if model_feature_names and len(model_feature_names) == len(importances):
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
156 processed_features = model_feature_names
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
157 else:
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
158 LOG.warning(
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
159 "Importances (%s) != features (%s). Skipping tree importance.",
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
160 len(importances),
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
161 len(processed_features),
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
162 )
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
163 self.tree_model_name = None
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
164 return
2
77c88226bfde planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
165
4
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
166 feature_importances = pd.DataFrame(
8
ba45bc057d70 planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 7
diff changeset
167 {"Feature": processed_features, "Importance": importances}
ba45bc057d70 planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 7
diff changeset
168 ).sort_values(by="Importance", ascending=False)
12
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
169 cap = (
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
170 min(self.max_plot_features, len(feature_importances))
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
171 if self.max_plot_features is not None
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
172 else len(feature_importances)
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
173 )
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
174 plot_importances = feature_importances.head(cap)
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
175 if cap < len(feature_importances):
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
176 LOG.info(
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
177 "Tree importance plot limited to top %s of %s features",
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
178 cap,
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
179 len(feature_importances),
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
180 )
0
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
181 plt.figure(figsize=(10, 6))
12
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
182 plt.barh(
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
183 plot_importances["Feature"],
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
184 plot_importances["Importance"],
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
185 )
8
ba45bc057d70 planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 7
diff changeset
186 plt.xlabel("Importance")
12
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
187 plt.title(f"Feature Importance ({model_type}) (top {cap})")
8
ba45bc057d70 planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 7
diff changeset
188 plot_path = os.path.join(self.output_dir, "tree_importance.png")
12
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
189 plt.tight_layout()
8
ba45bc057d70 planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 7
diff changeset
190 plt.savefig(plot_path, bbox_inches="tight")
0
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
191 plt.close()
8
ba45bc057d70 planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 7
diff changeset
192 self.plots["tree_importance"] = plot_path
0
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
193
8
ba45bc057d70 planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 7
diff changeset
194 def save_shap_values(self, max_samples=None, max_display=None, max_features=None):
4
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
195 model = self.best_model or self.exp.get_config("best_model")
2
77c88226bfde planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
196
12
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
197 X_data = self._get_transformed_frame(model)
4
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
198 if X_data is None:
8
ba45bc057d70 planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 7
diff changeset
199 raise RuntimeError("No transformed dataset found for SHAP.")
ba45bc057d70 planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 7
diff changeset
200
ba45bc057d70 planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 7
diff changeset
201 n_rows, n_features = X_data.shape
12
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
202 self.shap_total_features = n_features
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
203 feature_cap = (
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
204 min(self.max_plot_features, n_features)
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
205 if self.max_plot_features is not None
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
206 else n_features
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
207 )
8
ba45bc057d70 planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 7
diff changeset
208 if max_features is None:
12
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
209 max_features = feature_cap
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
210 else:
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
211 max_features = min(max_features, feature_cap)
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
212 display_features = list(X_data.columns)
4
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
213
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
214 try:
8
ba45bc057d70 planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 7
diff changeset
215 if hasattr(model, "feature_importances_"):
ba45bc057d70 planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 7
diff changeset
216 importances = pd.Series(
ba45bc057d70 planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 7
diff changeset
217 model.feature_importances_, index=X_data.columns
ba45bc057d70 planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 7
diff changeset
218 )
ba45bc057d70 planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 7
diff changeset
219 top_features = importances.nlargest(max_features).index
ba45bc057d70 planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 7
diff changeset
220 elif hasattr(model, "coef_"):
ba45bc057d70 planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 7
diff changeset
221 coef = abs(model.coef_).flatten()
ba45bc057d70 planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 7
diff changeset
222 importances = pd.Series(coef, index=X_data.columns)
ba45bc057d70 planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 7
diff changeset
223 top_features = importances.nlargest(max_features).index
ba45bc057d70 planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 7
diff changeset
224 else:
ba45bc057d70 planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 7
diff changeset
225 variances = X_data.var()
ba45bc057d70 planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 7
diff changeset
226 top_features = variances.nlargest(max_features).index
ba45bc057d70 planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 7
diff changeset
227
12
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
228 candidate_features = list(top_features)
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
229 missing = [f for f in candidate_features if f not in X_data.columns]
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
230 display_features = [f for f in candidate_features if f in X_data.columns]
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
231 if missing:
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
232 LOG.warning(
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
233 "Dropping %s transformed feature(s) not present in SHAP frame: %s",
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
234 len(missing),
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
235 missing[:5],
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
236 )
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
237 if display_features and len(display_features) < n_features:
8
ba45bc057d70 planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 7
diff changeset
238 LOG.info(
12
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
239 "Restricting SHAP display to top %s of %s features",
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
240 len(display_features),
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
241 n_features,
8
ba45bc057d70 planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 7
diff changeset
242 )
12
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
243 elif not display_features:
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
244 display_features = list(X_data.columns)
8
ba45bc057d70 planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 7
diff changeset
245 except Exception as e:
ba45bc057d70 planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 7
diff changeset
246 LOG.warning(
ba45bc057d70 planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 7
diff changeset
247 f"Feature limiting failed: {e}. Using all {n_features} features."
ba45bc057d70 planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 7
diff changeset
248 )
12
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
249 display_features = list(X_data.columns)
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
250
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
251 self.shap_used_features = len(display_features)
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
252
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
253 # Apply the column restriction so SHAP only runs on the selected features.
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
254 if display_features:
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
255 X_data = X_data[display_features]
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
256 n_rows, n_features = X_data.shape
4
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
257
8
ba45bc057d70 planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 7
diff changeset
258 # --- Adaptive row subsampling ---
ba45bc057d70 planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 7
diff changeset
259 if max_samples is None:
ba45bc057d70 planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 7
diff changeset
260 if n_rows <= 500:
ba45bc057d70 planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 7
diff changeset
261 max_samples = n_rows
ba45bc057d70 planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 7
diff changeset
262 elif n_rows <= 5000:
ba45bc057d70 planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 7
diff changeset
263 max_samples = 500
ba45bc057d70 planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 7
diff changeset
264 else:
ba45bc057d70 planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 7
diff changeset
265 max_samples = min(1000, int(n_rows * 0.1))
ba45bc057d70 planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 7
diff changeset
266
12
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
267 if self.max_shap_rows is not None:
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
268 max_samples = min(max_samples, self.max_shap_rows)
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
269
8
ba45bc057d70 planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 7
diff changeset
270 if n_rows > max_samples:
ba45bc057d70 planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 7
diff changeset
271 LOG.info(f"Subsampling SHAP rows: {max_samples} of {n_rows}")
ba45bc057d70 planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 7
diff changeset
272 X_data = X_data.sample(max_samples, random_state=42)
4
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
273
8
ba45bc057d70 planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 7
diff changeset
274 # --- Adaptive feature display ---
12
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
275 display_cap = (
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
276 min(self.max_plot_features, len(display_features))
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
277 if self.max_plot_features is not None
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
278 else len(display_features)
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
279 )
8
ba45bc057d70 planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 7
diff changeset
280 if max_display is None:
12
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
281 max_display = display_cap
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
282 else:
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
283 max_display = min(max_display, display_cap)
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
284 if not display_features:
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
285 display_features = list(X_data.columns)
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
286 max_display = len(display_features)
8
ba45bc057d70 planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 7
diff changeset
287
ba45bc057d70 planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 7
diff changeset
288 # Background set
ba45bc057d70 planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 7
diff changeset
289 bg = X_data.sample(min(len(X_data), 100), random_state=42)
13
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
290 predict_fn = self._get_predict_fn(model)
8
ba45bc057d70 planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 7
diff changeset
291
13
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
292 # Optimized explainer based on model type
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
293 explainer, explainer_label, tree_based = self._choose_shap_explainer(
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
294 model, bg, predict_fn
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
295 )
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
296 if explainer is None:
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
297 LOG.warning("No suitable SHAP explainer for model %s; skipping SHAP.", model)
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
298 self.shap_model_name = None
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
299 return
2
77c88226bfde planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
300
7
0afd970bd8ae planemo upload for repository https://github.com/goeckslab/gleam commit 55deacbbc78a00f27d789e11d563ba49dfb9cf9e
goeckslab
parents: 4
diff changeset
301 try:
8
ba45bc057d70 planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 7
diff changeset
302 shap_values = explainer(X_data)
7
0afd970bd8ae planemo upload for repository https://github.com/goeckslab/gleam commit 55deacbbc78a00f27d789e11d563ba49dfb9cf9e
goeckslab
parents: 4
diff changeset
303 self.shap_model_name = explainer.__class__.__name__
0afd970bd8ae planemo upload for repository https://github.com/goeckslab/gleam commit 55deacbbc78a00f27d789e11d563ba49dfb9cf9e
goeckslab
parents: 4
diff changeset
304 except Exception as e:
12
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
305 error_message = str(e)
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
306 needs_tree_fallback = (
13
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
307 tree_based
12
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
308 and "does not cover all the leaves" in error_message.lower()
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
309 )
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
310 feature_name_mismatch = "feature names should match" in error_message.lower()
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
311 if needs_tree_fallback:
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
312 LOG.warning(
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
313 "SHAP computation failed using '%s' perturbation (%s). "
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
314 "Retrying with interventional perturbation.",
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
315 explainer_label,
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
316 error_message,
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
317 )
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
318 try:
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
319 explainer = shap.TreeExplainer(
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
320 model,
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
321 bg,
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
322 feature_perturbation="interventional",
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
323 n_jobs=-1,
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
324 )
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
325 shap_values = explainer(X_data)
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
326 self.shap_model_name = (
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
327 f"{explainer.__class__.__name__} (interventional)"
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
328 )
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
329 except Exception as retry_exc:
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
330 LOG.error(
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
331 "SHAP computation failed even after fallback: %s",
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
332 retry_exc,
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
333 )
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
334 self.shap_model_name = None
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
335 return
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
336 elif feature_name_mismatch:
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
337 LOG.warning(
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
338 "SHAP computation failed due to feature-name mismatch (%s). "
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
339 "Falling back to model-agnostic SHAP explainer.",
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
340 error_message,
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
341 )
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
342 try:
13
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
343 agnostic_explainer = shap.Explainer(
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
344 predict_fn, bg, algorithm="permutation"
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
345 )
12
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
346 shap_values = agnostic_explainer(X_data)
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
347 self.shap_model_name = (
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
348 f"{agnostic_explainer.__class__.__name__} (fallback)"
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
349 )
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
350 except Exception as fallback_exc:
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
351 LOG.error(
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
352 "Model-agnostic SHAP fallback also failed: %s",
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
353 fallback_exc,
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
354 )
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
355 self.shap_model_name = None
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
356 return
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
357 else:
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
358 LOG.error(f"SHAP computation failed: {e}")
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
359 self.shap_model_name = None
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
360 return
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
361
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
362 def _limit_explanation_features(explanation):
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
363 if len(display_features) >= n_features:
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
364 return explanation
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
365 try:
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
366 limited = explanation[:, display_features]
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
367 LOG.info(
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
368 "SHAP explanation trimmed to %s display features.",
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
369 len(display_features),
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
370 )
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
371 return limited
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
372 except Exception as exc:
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
373 LOG.warning(
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
374 "Failed to restrict SHAP explanation to top features "
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
375 "(sample=%s); plot will include all features. Error: %s",
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
376 display_features[:5],
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
377 exc,
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
378 )
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
379 # Keep using full feature list if trimming fails
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
380 return explanation
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
381
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
382 shap_shape = getattr(shap_values, "shape", None)
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
383 class_labels = list(getattr(model, "classes_", []))
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
384 shap_outputs = []
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
385 if shap_shape is not None and len(shap_shape) == 3:
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
386 output_count = shap_shape[2]
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
387 LOG.info("Detected multi-output SHAP explanation with %s classes.", output_count)
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
388 for class_idx in range(output_count):
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
389 try:
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
390 class_expl = shap_values[..., class_idx]
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
391 except Exception as exc:
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
392 LOG.warning(
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
393 "Failed to extract SHAP explanation for class index %s: %s",
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
394 class_idx,
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
395 exc,
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
396 )
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
397 continue
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
398 label = (
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
399 class_labels[class_idx]
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
400 if class_labels and class_idx < len(class_labels)
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
401 else class_idx
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
402 )
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
403 shap_outputs.append((class_idx, label, class_expl))
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
404 else:
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
405 shap_outputs.append((None, None, shap_values))
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
406
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
407 if not shap_outputs:
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
408 LOG.error("No SHAP outputs available for plotting.")
7
0afd970bd8ae planemo upload for repository https://github.com/goeckslab/gleam commit 55deacbbc78a00f27d789e11d563ba49dfb9cf9e
goeckslab
parents: 4
diff changeset
409 self.shap_model_name = None
0afd970bd8ae planemo upload for repository https://github.com/goeckslab/gleam commit 55deacbbc78a00f27d789e11d563ba49dfb9cf9e
goeckslab
parents: 4
diff changeset
410 return
4
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
411
12
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
412 # --- Plot SHAP summary (one per class if needed) ---
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
413 for class_idx, class_label, class_expl in shap_outputs:
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
414 expl_to_plot = _limit_explanation_features(class_expl)
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
415 suffix = ""
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
416 plot_key = "shap_summary"
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
417 if class_idx is not None:
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
418 safe_label = str(class_label).replace("/", "_").replace(" ", "_")
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
419 suffix = f"_class_{safe_label}"
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
420 plot_key = f"shap_summary_class_{safe_label}"
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
421 out_filename = f"shap_summary{suffix}.png"
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
422 out_path = os.path.join(self.output_dir, out_filename)
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
423 plt.figure()
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
424 shap.plots.beeswarm(expl_to_plot, max_display=max_display, show=False)
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
425 title = f"SHAP Summary for {model.__class__.__name__}"
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
426 if class_idx is not None:
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
427 title += f" (class {class_label})"
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
428 plt.title(f"{title} (top {max_display} features)")
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
429 plt.tight_layout()
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
430 plt.savefig(out_path, bbox_inches="tight")
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
431 plt.close()
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
432 self.plots[plot_key] = out_path
4
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
433
8
ba45bc057d70 planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 7
diff changeset
434 # --- Log summary ---
ba45bc057d70 planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 7
diff changeset
435 LOG.info(
12
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
436 "SHAP summary completed with %s rows and %s features "
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
437 "(displaying top %s) across %s output(s).",
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
438 X_data.shape[0],
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
439 X_data.shape[1],
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
440 max_display,
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
441 len(shap_outputs),
8
ba45bc057d70 planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 7
diff changeset
442 )
0
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
443
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
444 def generate_html_report(self):
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
445 LOG.info("Generating HTML report")
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
446 plots_html = ""
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
447 for plot_name, plot_path in self.plots.items():
8
ba45bc057d70 planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 7
diff changeset
448 if plot_name == "tree_importance" and not getattr(
ba45bc057d70 planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 7
diff changeset
449 self, "tree_model_name", None
4
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
450 ):
2
77c88226bfde planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
451 continue
0
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
452 encoded_image = self.encode_image_to_base64(plot_path)
8
ba45bc057d70 planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 7
diff changeset
453 if plot_name == "tree_importance" and getattr(
ba45bc057d70 planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 7
diff changeset
454 self, "tree_model_name", None
4
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
455 ):
8
ba45bc057d70 planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 7
diff changeset
456 section_title = f"Feature importance from {self.tree_model_name}"
ba45bc057d70 planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 7
diff changeset
457 elif plot_name == "shap_summary":
4
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
458 section_title = (
8
ba45bc057d70 planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 7
diff changeset
459 f"SHAP Summary from {getattr(self, 'shap_model_name', 'model')}"
4
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
460 )
12
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
461 elif plot_name.startswith("shap_summary_class_"):
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
462 class_label = plot_name.replace("shap_summary_class_", "")
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
463 section_title = (
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
464 f"SHAP Summary for class {class_label} "
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
465 f"({getattr(self, 'shap_model_name', 'model')})"
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
466 )
2
77c88226bfde planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
467 else:
77c88226bfde planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
468 section_title = plot_name
0
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
469 plots_html += f"""
12
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
470 <div class="plot" id="{plot_name}" style="text-align:center;margin-bottom:24px;">
2
77c88226bfde planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
471 <h2>{section_title}</h2>
12
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
472 <img src="data:image/png;base64,{encoded_image}" alt="{plot_name}"
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 8
diff changeset
473 style="max-width:95%;height:auto;display:block;margin:0 auto;border:1px solid #ddd;padding:8px;background:#fff;">
0
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
474 </div>
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
475 """
8
ba45bc057d70 planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 7
diff changeset
476 return f"{plots_html}"
0
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
477
2
77c88226bfde planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
478 def encode_image_to_base64(self, img_path):
8
ba45bc057d70 planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 7
diff changeset
479 with open(img_path, "rb") as img_file:
ba45bc057d70 planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 7
diff changeset
480 return base64.b64encode(img_file.read()).decode("utf-8")
0
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
481
13
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
482 def _get_predict_fn(self, model):
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
483 if hasattr(model, "predict_proba"):
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
484 return model.predict_proba
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
485 if hasattr(model, "decision_function"):
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
486 return model.decision_function
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
487 return model.predict
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
488
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
489 def _choose_shap_explainer(self, model, bg, predict_fn):
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
490 """
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
491 Select a SHAP explainer following the prescribed priority order for
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
492 algorithms. Returns (explainer, label, is_tree_based).
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
493 """
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
494 if model is None:
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
495 return None, None, False
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
496
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
497 name = model.__class__.__name__
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
498 lname = name.lower()
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
499 task = getattr(self, "task_type", None)
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
500
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
501 def _permutation(fn):
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
502 return shap.Explainer(fn, bg, algorithm="permutation")
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
503
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
504 if task == "classification":
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
505 # 1) Logistic Regression
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
506 if "logisticregression" in lname:
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
507 return _permutation(model.predict_proba), "permutation-proba", False
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
508
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
509 # 2) Ridge Classifier
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
510 if "ridgeclassifier" in lname:
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
511 fn = (
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
512 model.decision_function
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
513 if hasattr(model, "decision_function")
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
514 else predict_fn
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
515 )
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
516 return _permutation(fn), "permutation-decision_function", False
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
517
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
518 # 3) LDA
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
519 if "lineardiscriminantanalysis" in lname:
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
520 return _permutation(model.predict_proba), "permutation-proba", False
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
521
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
522 # 4) Random Forest
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
523 if "randomforestclassifier" in lname:
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
524 return (
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
525 shap.TreeExplainer(
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
526 model, bg, feature_perturbation="tree_path_dependent", n_jobs=-1
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
527 ),
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
528 "tree_path_dependent",
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
529 True,
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
530 )
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
531
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
532 # 5) Gradient Boosting
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
533 if "gradientboostingclassifier" in lname:
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
534 return (
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
535 shap.TreeExplainer(
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
536 model, bg, feature_perturbation="tree_path_dependent", n_jobs=-1
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
537 ),
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
538 "tree_path_dependent",
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
539 True,
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
540 )
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
541
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
542 # 6) AdaBoost
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
543 if "adaboostclassifier" in lname:
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
544 return (
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
545 shap.TreeExplainer(
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
546 model, bg, feature_perturbation="tree_path_dependent", n_jobs=-1
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
547 ),
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
548 "tree_path_dependent",
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
549 True,
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
550 )
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
551
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
552 # 7) Extra Trees
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
553 if "extratreesclassifier" in lname:
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
554 return (
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
555 shap.TreeExplainer(
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
556 model, bg, feature_perturbation="tree_path_dependent", n_jobs=-1
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
557 ),
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
558 "tree_path_dependent",
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
559 True,
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
560 )
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
561
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
562 # 8) LightGBM
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
563 if "lgbmclassifier" in lname:
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
564 return (
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
565 shap.TreeExplainer(
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
566 model,
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
567 bg,
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
568 model_output="raw",
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
569 feature_perturbation="tree_path_dependent",
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
570 n_jobs=-1,
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
571 ),
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
572 "tree_path_dependent",
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
573 True,
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
574 )
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
575
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
576 # 9) XGBoost
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
577 if "xgbclassifier" in lname:
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
578 return (
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
579 shap.TreeExplainer(
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
580 model, bg, feature_perturbation="tree_path_dependent", n_jobs=-1
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
581 ),
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
582 "tree_path_dependent",
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
583 True,
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
584 )
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
585
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
586 # 10) CatBoost (classifier)
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
587 if "catboost" in lname:
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
588 return (
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
589 shap.TreeExplainer(
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
590 model, bg, feature_perturbation="tree_path_dependent", n_jobs=-1
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
591 ),
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
592 "tree_path_dependent",
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
593 True,
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
594 )
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
595
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
596 # 11) KNN
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
597 if "kneighborsclassifier" in lname:
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
598 return _permutation(model.predict_proba), "permutation-proba", False
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
599
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
600 # 12) SVM - linear kernel
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
601 if "svc" in lname or "svm" in lname:
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
602 kernel = getattr(model, "kernel", None)
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
603 if kernel == "linear":
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
604 return shap.LinearExplainer(model, bg), "linear", False
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
605 return _permutation(predict_fn), "permutation-svm", False
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
606
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
607 # 13) Decision Tree
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
608 if "decisiontreeclassifier" in lname:
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
609 return (
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
610 shap.TreeExplainer(
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
611 model, bg, feature_perturbation="tree_path_dependent", n_jobs=-1
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
612 ),
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
613 "tree_path_dependent",
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
614 True,
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
615 )
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
616
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
617 # 14) Naive Bayes
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
618 if "naive_bayes" in lname or lname.endswith("nb"):
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
619 fn = model.predict_proba if hasattr(model, "predict_proba") else predict_fn
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
620 return _permutation(fn), "permutation-proba", False
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
621
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
622 # 15) QDA
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
623 if "quadraticdiscriminantanalysis" in lname:
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
624 return _permutation(model.predict_proba), "permutation-proba", False
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
625
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
626 # 16) Dummy
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
627 if "dummyclassifier" in lname:
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
628 return None, None, False
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
629
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
630 # Default classification: permutation on predict_fn
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
631 return _permutation(predict_fn), "permutation-default", False
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
632
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
633 # Regression path
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
634 # Linear family
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
635 linear_keys = [
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
636 "linearregression",
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
637 "lasso",
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
638 "ridge",
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
639 "elasticnet",
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
640 "lars",
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
641 "lassolars",
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
642 "orthogonalmatchingpursuit",
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
643 "bayesianridge",
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
644 "ardregression",
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
645 "passiveaggressiveregressor",
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
646 "theilsenregressor",
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
647 "huberregressor",
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
648 ]
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
649 if any(k in lname for k in linear_keys):
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
650 return shap.LinearExplainer(model, bg), "linear", False
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
651
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
652 # Kernel ridge / SVR / KNN / MLP / RANSAC (model-agnostic)
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
653 if "kernelridge" in lname:
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
654 return _permutation(predict_fn), "permutation-kernelridge", False
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
655 if "svr" in lname or "svm" in lname:
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
656 kernel = getattr(model, "kernel", None)
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
657 if kernel == "linear":
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
658 return shap.LinearExplainer(model, bg), "linear", False
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
659 return _permutation(predict_fn), "permutation-svr", False
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
660 if "kneighborsregressor" in lname:
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
661 return _permutation(predict_fn), "permutation-knn", False
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
662 if "mlpregressor" in lname:
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
663 return _permutation(predict_fn), "permutation-mlp", False
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
664 if "ransacregressor" in lname:
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
665 return _permutation(predict_fn), "permutation-ransac", False
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
666
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
667 # Tree-based regressors
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
668 tree_class_names = [
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
669 "decisiontreeregressor",
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
670 "randomforestregressor",
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
671 "extratreesregressor",
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
672 "adaboostregressor",
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
673 "gradientboostingregressor",
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
674 ]
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
675 if any(k in lname for k in tree_class_names):
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
676 return (
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
677 shap.TreeExplainer(
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
678 model, bg, feature_perturbation="tree_path_dependent", n_jobs=-1
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
679 ),
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
680 "tree_path_dependent",
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
681 True,
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
682 )
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
683
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
684 # Boosting libraries
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
685 if "lgbmregressor" in lname or "lightgbm" in lname:
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
686 return (
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
687 shap.TreeExplainer(
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
688 model,
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
689 bg,
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
690 model_output="raw",
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
691 feature_perturbation="tree_path_dependent",
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
692 n_jobs=-1,
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
693 ),
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
694 "tree_path_dependent",
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
695 True,
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
696 )
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
697 if "xgbregressor" in lname or "xgboost" in lname:
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
698 return (
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
699 shap.TreeExplainer(
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
700 model, bg, feature_perturbation="tree_path_dependent", n_jobs=-1
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
701 ),
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
702 "tree_path_dependent",
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
703 True,
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
704 )
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
705 if "catboost" in lname:
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
706 return (
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
707 shap.TreeExplainer(
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
708 model, bg, feature_perturbation="tree_path_dependent", n_jobs=-1
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
709 ),
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
710 "tree_path_dependent",
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
711 True,
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
712 )
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
713
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
714 # Default regression: model-agnostic permutation explainer
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
715 return _permutation(predict_fn), "permutation-default", False
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
716
2
77c88226bfde planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
717 def run(self):
4
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
718 if (
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
719 self.exp is None
8
ba45bc057d70 planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 7
diff changeset
720 or not hasattr(self.exp, "is_setup")
4
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
721 or not self.exp.is_setup
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
722 ):
2
77c88226bfde planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
723 self.setup_pycaret()
77c88226bfde planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
724 self.save_tree_importance()
77c88226bfde planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
725 self.save_shap_values()
8
ba45bc057d70 planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 7
diff changeset
726 return self.generate_html_report()