Mercurial > repos > goeckslab > tabular_learner
annotate feature_importance.py @ 8:ba45bc057d70 draft default tip
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
author | goeckslab |
---|---|
date | Mon, 08 Sep 2025 22:38:55 +0000 |
parents | 0afd970bd8ae |
children |
rev | line source |
---|---|
0
209b663a4f62
planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff
changeset
|
1 import base64 |
209b663a4f62
planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff
changeset
|
2 import logging |
209b663a4f62
planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff
changeset
|
3 import os |
209b663a4f62
planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff
changeset
|
4 |
209b663a4f62
planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff
changeset
|
5 import matplotlib.pyplot as plt |
209b663a4f62
planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff
changeset
|
6 import pandas as pd |
2
77c88226bfde
planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
7 import shap |
0
209b663a4f62
planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff
changeset
|
8 from pycaret.classification import ClassificationExperiment |
209b663a4f62
planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff
changeset
|
9 from pycaret.regression import RegressionExperiment |
209b663a4f62
planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff
changeset
|
10 |
209b663a4f62
planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff
changeset
|
11 logging.basicConfig(level=logging.DEBUG) |
209b663a4f62
planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff
changeset
|
12 LOG = logging.getLogger(__name__) |
209b663a4f62
planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff
changeset
|
13 |
209b663a4f62
planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff
changeset
|
14 |
209b663a4f62
planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff
changeset
|
15 class FeatureImportanceAnalyzer: |
209b663a4f62
planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff
changeset
|
16 def __init__( |
4
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
17 self, |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
18 task_type, |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
19 output_dir, |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
20 data_path=None, |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
21 data=None, |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
22 target_col=None, |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
23 exp=None, |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
24 best_model=None, |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
25 ): |
2
77c88226bfde
planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
26 self.task_type = task_type |
77c88226bfde
planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
27 self.output_dir = output_dir |
77c88226bfde
planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
28 self.exp = exp |
77c88226bfde
planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
29 self.best_model = best_model |
77c88226bfde
planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
30 |
77c88226bfde
planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
31 if exp is not None: |
77c88226bfde
planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
32 # Assume all configs (data, target) are in exp |
77c88226bfde
planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
33 self.data = exp.dataset.copy() |
77c88226bfde
planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
34 self.target = exp.target_param |
77c88226bfde
planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
35 LOG.info("Using provided experiment object") |
0
209b663a4f62
planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff
changeset
|
36 else: |
2
77c88226bfde
planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
37 if data is not None: |
77c88226bfde
planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
38 self.data = data |
77c88226bfde
planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
39 LOG.info("Data loaded from memory") |
77c88226bfde
planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
40 else: |
77c88226bfde
planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
41 self.target_col = target_col |
8
ba45bc057d70
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
7
diff
changeset
|
42 self.data = pd.read_csv(data_path, sep=None, engine="python") |
ba45bc057d70
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
7
diff
changeset
|
43 self.data.columns = self.data.columns.str.replace(".", "_") |
2
77c88226bfde
planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
44 self.data = self.data.fillna(self.data.median(numeric_only=True)) |
77c88226bfde
planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
45 self.target = self.data.columns[int(target_col) - 1] |
4
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
46 self.exp = ( |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
47 ClassificationExperiment() |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
48 if task_type == "classification" |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
49 else RegressionExperiment() |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
50 ) |
2
77c88226bfde
planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
51 |
0
209b663a4f62
planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff
changeset
|
52 self.plots = {} |
209b663a4f62
planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff
changeset
|
53 |
209b663a4f62
planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff
changeset
|
54 def setup_pycaret(self): |
8
ba45bc057d70
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
7
diff
changeset
|
55 if self.exp is not None and hasattr(self.exp, "is_setup") and self.exp.is_setup: |
2
77c88226bfde
planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
56 LOG.info("Experiment already set up. Skipping PyCaret setup.") |
77c88226bfde
planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
57 return |
0
209b663a4f62
planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff
changeset
|
58 LOG.info("Initializing PyCaret") |
209b663a4f62
planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff
changeset
|
59 setup_params = { |
8
ba45bc057d70
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
7
diff
changeset
|
60 "target": self.target, |
ba45bc057d70
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
7
diff
changeset
|
61 "session_id": 123, |
ba45bc057d70
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
7
diff
changeset
|
62 "html": True, |
ba45bc057d70
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
7
diff
changeset
|
63 "log_experiment": False, |
ba45bc057d70
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
7
diff
changeset
|
64 "system_log": False, |
0
209b663a4f62
planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff
changeset
|
65 } |
209b663a4f62
planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff
changeset
|
66 self.exp.setup(self.data, **setup_params) |
209b663a4f62
planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff
changeset
|
67 |
2
77c88226bfde
planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
68 def save_tree_importance(self): |
8
ba45bc057d70
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
7
diff
changeset
|
69 model = self.best_model or self.exp.get_config("best_model") |
ba45bc057d70
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
7
diff
changeset
|
70 processed_features = self.exp.get_config("X_transformed").columns |
2
77c88226bfde
planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
71 |
77c88226bfde
planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
72 importances = None |
77c88226bfde
planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
73 model_type = model.__class__.__name__ |
8
ba45bc057d70
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
7
diff
changeset
|
74 self.tree_model_name = model_type |
0
209b663a4f62
planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff
changeset
|
75 |
8
ba45bc057d70
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
7
diff
changeset
|
76 if hasattr(model, "feature_importances_"): |
2
77c88226bfde
planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
77 importances = model.feature_importances_ |
8
ba45bc057d70
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
7
diff
changeset
|
78 elif hasattr(model, "coef_"): |
2
77c88226bfde
planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
79 importances = abs(model.coef_).flatten() |
77c88226bfde
planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
80 else: |
4
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
81 LOG.warning( |
8
ba45bc057d70
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
7
diff
changeset
|
82 f"Model {model_type} does not have feature_importances_ or coef_. Skipping tree importance." |
4
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
83 ) |
8
ba45bc057d70
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
7
diff
changeset
|
84 self.tree_model_name = None |
2
77c88226bfde
planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
85 return |
77c88226bfde
planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
86 |
77c88226bfde
planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
87 if len(importances) != len(processed_features): |
77c88226bfde
planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
88 LOG.warning( |
8
ba45bc057d70
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
7
diff
changeset
|
89 f"Importances ({len(importances)}) != features ({len(processed_features)}). Skipping tree importance." |
2
77c88226bfde
planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
90 ) |
77c88226bfde
planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
91 self.tree_model_name = None |
77c88226bfde
planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
92 return |
77c88226bfde
planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
93 |
4
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
94 feature_importances = pd.DataFrame( |
8
ba45bc057d70
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
7
diff
changeset
|
95 {"Feature": processed_features, "Importance": importances} |
ba45bc057d70
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
7
diff
changeset
|
96 ).sort_values(by="Importance", ascending=False) |
0
209b663a4f62
planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff
changeset
|
97 plt.figure(figsize=(10, 6)) |
8
ba45bc057d70
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
7
diff
changeset
|
98 plt.barh(feature_importances["Feature"], feature_importances["Importance"]) |
ba45bc057d70
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
7
diff
changeset
|
99 plt.xlabel("Importance") |
ba45bc057d70
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
7
diff
changeset
|
100 plt.title(f"Feature Importance ({model_type})") |
ba45bc057d70
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
7
diff
changeset
|
101 plot_path = os.path.join(self.output_dir, "tree_importance.png") |
ba45bc057d70
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
7
diff
changeset
|
102 plt.savefig(plot_path, bbox_inches="tight") |
0
209b663a4f62
planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff
changeset
|
103 plt.close() |
8
ba45bc057d70
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
7
diff
changeset
|
104 self.plots["tree_importance"] = plot_path |
0
209b663a4f62
planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff
changeset
|
105 |
8
ba45bc057d70
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
7
diff
changeset
|
106 def save_shap_values(self, max_samples=None, max_display=None, max_features=None): |
4
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
107 model = self.best_model or self.exp.get_config("best_model") |
2
77c88226bfde
planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
108 |
4
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
109 X_data = None |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
110 for key in ("X_test_transformed", "X_train_transformed"): |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
111 try: |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
112 X_data = self.exp.get_config(key) |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
113 break |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
114 except KeyError: |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
115 continue |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
116 if X_data is None: |
8
ba45bc057d70
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
7
diff
changeset
|
117 raise RuntimeError("No transformed dataset found for SHAP.") |
ba45bc057d70
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
7
diff
changeset
|
118 |
ba45bc057d70
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
7
diff
changeset
|
119 # --- Adaptive feature limiting (proportional cap) --- |
ba45bc057d70
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
7
diff
changeset
|
120 n_rows, n_features = X_data.shape |
ba45bc057d70
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
7
diff
changeset
|
121 if max_features is None: |
ba45bc057d70
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
7
diff
changeset
|
122 if n_features <= 200: |
ba45bc057d70
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
7
diff
changeset
|
123 max_features = n_features |
ba45bc057d70
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
7
diff
changeset
|
124 else: |
ba45bc057d70
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
7
diff
changeset
|
125 max_features = min(200, max(20, int(n_features * 0.1))) |
4
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
126 |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
127 try: |
8
ba45bc057d70
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
7
diff
changeset
|
128 if hasattr(model, "feature_importances_"): |
ba45bc057d70
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
7
diff
changeset
|
129 importances = pd.Series( |
ba45bc057d70
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
7
diff
changeset
|
130 model.feature_importances_, index=X_data.columns |
ba45bc057d70
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
7
diff
changeset
|
131 ) |
ba45bc057d70
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
7
diff
changeset
|
132 top_features = importances.nlargest(max_features).index |
ba45bc057d70
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
7
diff
changeset
|
133 elif hasattr(model, "coef_"): |
ba45bc057d70
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
7
diff
changeset
|
134 coef = abs(model.coef_).flatten() |
ba45bc057d70
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
7
diff
changeset
|
135 importances = pd.Series(coef, index=X_data.columns) |
ba45bc057d70
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
7
diff
changeset
|
136 top_features = importances.nlargest(max_features).index |
ba45bc057d70
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
7
diff
changeset
|
137 else: |
ba45bc057d70
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
7
diff
changeset
|
138 variances = X_data.var() |
ba45bc057d70
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
7
diff
changeset
|
139 top_features = variances.nlargest(max_features).index |
ba45bc057d70
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
7
diff
changeset
|
140 |
ba45bc057d70
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
7
diff
changeset
|
141 if len(top_features) < n_features: |
ba45bc057d70
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
7
diff
changeset
|
142 LOG.info( |
ba45bc057d70
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
7
diff
changeset
|
143 f"Restricted SHAP computation to top {len(top_features)} / {n_features} features" |
ba45bc057d70
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
7
diff
changeset
|
144 ) |
ba45bc057d70
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
7
diff
changeset
|
145 X_data = X_data[top_features] |
ba45bc057d70
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
7
diff
changeset
|
146 except Exception as e: |
ba45bc057d70
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
7
diff
changeset
|
147 LOG.warning( |
ba45bc057d70
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
7
diff
changeset
|
148 f"Feature limiting failed: {e}. Using all {n_features} features." |
ba45bc057d70
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
7
diff
changeset
|
149 ) |
4
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
150 |
8
ba45bc057d70
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
7
diff
changeset
|
151 # --- Adaptive row subsampling --- |
ba45bc057d70
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
7
diff
changeset
|
152 if max_samples is None: |
ba45bc057d70
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
7
diff
changeset
|
153 if n_rows <= 500: |
ba45bc057d70
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
7
diff
changeset
|
154 max_samples = n_rows |
ba45bc057d70
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
7
diff
changeset
|
155 elif n_rows <= 5000: |
ba45bc057d70
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
7
diff
changeset
|
156 max_samples = 500 |
ba45bc057d70
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
7
diff
changeset
|
157 else: |
ba45bc057d70
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
7
diff
changeset
|
158 max_samples = min(1000, int(n_rows * 0.1)) |
ba45bc057d70
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
7
diff
changeset
|
159 |
ba45bc057d70
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
7
diff
changeset
|
160 if n_rows > max_samples: |
ba45bc057d70
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
7
diff
changeset
|
161 LOG.info(f"Subsampling SHAP rows: {max_samples} of {n_rows}") |
ba45bc057d70
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
7
diff
changeset
|
162 X_data = X_data.sample(max_samples, random_state=42) |
4
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
163 |
8
ba45bc057d70
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
7
diff
changeset
|
164 # --- Adaptive feature display --- |
ba45bc057d70
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
7
diff
changeset
|
165 if max_display is None: |
ba45bc057d70
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
7
diff
changeset
|
166 if X_data.shape[1] <= 20: |
ba45bc057d70
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
7
diff
changeset
|
167 max_display = X_data.shape[1] |
ba45bc057d70
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
7
diff
changeset
|
168 elif X_data.shape[1] <= 100: |
ba45bc057d70
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
7
diff
changeset
|
169 max_display = 30 |
ba45bc057d70
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
7
diff
changeset
|
170 else: |
ba45bc057d70
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
7
diff
changeset
|
171 max_display = 50 |
ba45bc057d70
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
7
diff
changeset
|
172 |
ba45bc057d70
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
7
diff
changeset
|
173 # Background set |
ba45bc057d70
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
7
diff
changeset
|
174 bg = X_data.sample(min(len(X_data), 100), random_state=42) |
ba45bc057d70
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
7
diff
changeset
|
175 predict_fn = ( |
ba45bc057d70
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
7
diff
changeset
|
176 model.predict_proba if hasattr(model, "predict_proba") else model.predict |
ba45bc057d70
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
7
diff
changeset
|
177 ) |
ba45bc057d70
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
7
diff
changeset
|
178 |
ba45bc057d70
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
7
diff
changeset
|
179 # Optimized explainer |
ba45bc057d70
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
7
diff
changeset
|
180 if hasattr(model, "feature_importances_"): |
ba45bc057d70
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
7
diff
changeset
|
181 explainer = shap.TreeExplainer( |
ba45bc057d70
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
7
diff
changeset
|
182 model, bg, feature_perturbation="tree_path_dependent", n_jobs=-1 |
ba45bc057d70
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
7
diff
changeset
|
183 ) |
ba45bc057d70
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
7
diff
changeset
|
184 elif hasattr(model, "coef_"): |
ba45bc057d70
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
7
diff
changeset
|
185 explainer = shap.LinearExplainer(model, bg) |
ba45bc057d70
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
7
diff
changeset
|
186 else: |
ba45bc057d70
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
7
diff
changeset
|
187 explainer = shap.Explainer(predict_fn, bg) |
2
77c88226bfde
planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
188 |
7
0afd970bd8ae
planemo upload for repository https://github.com/goeckslab/gleam commit 55deacbbc78a00f27d789e11d563ba49dfb9cf9e
goeckslab
parents:
4
diff
changeset
|
189 try: |
8
ba45bc057d70
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
7
diff
changeset
|
190 shap_values = explainer(X_data) |
7
0afd970bd8ae
planemo upload for repository https://github.com/goeckslab/gleam commit 55deacbbc78a00f27d789e11d563ba49dfb9cf9e
goeckslab
parents:
4
diff
changeset
|
191 self.shap_model_name = explainer.__class__.__name__ |
0afd970bd8ae
planemo upload for repository https://github.com/goeckslab/gleam commit 55deacbbc78a00f27d789e11d563ba49dfb9cf9e
goeckslab
parents:
4
diff
changeset
|
192 except Exception as e: |
0afd970bd8ae
planemo upload for repository https://github.com/goeckslab/gleam commit 55deacbbc78a00f27d789e11d563ba49dfb9cf9e
goeckslab
parents:
4
diff
changeset
|
193 LOG.error(f"SHAP computation failed: {e}") |
0afd970bd8ae
planemo upload for repository https://github.com/goeckslab/gleam commit 55deacbbc78a00f27d789e11d563ba49dfb9cf9e
goeckslab
parents:
4
diff
changeset
|
194 self.shap_model_name = None |
0afd970bd8ae
planemo upload for repository https://github.com/goeckslab/gleam commit 55deacbbc78a00f27d789e11d563ba49dfb9cf9e
goeckslab
parents:
4
diff
changeset
|
195 return |
4
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
196 |
8
ba45bc057d70
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
7
diff
changeset
|
197 # --- Plot SHAP summary --- |
ba45bc057d70
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
7
diff
changeset
|
198 out_path = os.path.join(self.output_dir, "shap_summary.png") |
ba45bc057d70
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
7
diff
changeset
|
199 plt.figure() |
ba45bc057d70
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
7
diff
changeset
|
200 shap.plots.beeswarm(shap_values, max_display=max_display, show=False) |
ba45bc057d70
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
7
diff
changeset
|
201 plt.title( |
ba45bc057d70
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
7
diff
changeset
|
202 f"SHAP Summary for {model.__class__.__name__} (top {max_display} features)" |
ba45bc057d70
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
7
diff
changeset
|
203 ) |
ba45bc057d70
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
7
diff
changeset
|
204 plt.savefig(out_path, bbox_inches="tight") |
ba45bc057d70
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
7
diff
changeset
|
205 plt.close() |
ba45bc057d70
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
7
diff
changeset
|
206 self.plots["shap_summary"] = out_path |
4
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
207 |
8
ba45bc057d70
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
7
diff
changeset
|
208 # --- Log summary --- |
ba45bc057d70
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
7
diff
changeset
|
209 LOG.info( |
ba45bc057d70
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
7
diff
changeset
|
210 f"SHAP summary completed with {X_data.shape[0]} rows and {X_data.shape[1]} features (displaying top {max_display})." |
ba45bc057d70
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
7
diff
changeset
|
211 ) |
0
209b663a4f62
planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff
changeset
|
212 |
209b663a4f62
planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff
changeset
|
213 def generate_html_report(self): |
209b663a4f62
planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff
changeset
|
214 LOG.info("Generating HTML report") |
209b663a4f62
planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff
changeset
|
215 plots_html = "" |
209b663a4f62
planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff
changeset
|
216 for plot_name, plot_path in self.plots.items(): |
8
ba45bc057d70
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
7
diff
changeset
|
217 if plot_name == "tree_importance" and not getattr( |
ba45bc057d70
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
7
diff
changeset
|
218 self, "tree_model_name", None |
4
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
219 ): |
2
77c88226bfde
planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
220 continue |
0
209b663a4f62
planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff
changeset
|
221 encoded_image = self.encode_image_to_base64(plot_path) |
8
ba45bc057d70
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
7
diff
changeset
|
222 if plot_name == "tree_importance" and getattr( |
ba45bc057d70
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
7
diff
changeset
|
223 self, "tree_model_name", None |
4
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
224 ): |
8
ba45bc057d70
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
7
diff
changeset
|
225 section_title = f"Feature importance from {self.tree_model_name}" |
ba45bc057d70
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
7
diff
changeset
|
226 elif plot_name == "shap_summary": |
4
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
227 section_title = ( |
8
ba45bc057d70
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
7
diff
changeset
|
228 f"SHAP Summary from {getattr(self, 'shap_model_name', 'model')}" |
4
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
229 ) |
2
77c88226bfde
planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
230 else: |
77c88226bfde
planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
231 section_title = plot_name |
0
209b663a4f62
planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff
changeset
|
232 plots_html += f""" |
209b663a4f62
planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff
changeset
|
233 <div class="plot" id="{plot_name}"> |
2
77c88226bfde
planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
234 <h2>{section_title}</h2> |
77c88226bfde
planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
235 <img src="data:image/png;base64,{encoded_image}" alt="{plot_name}"> |
0
209b663a4f62
planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff
changeset
|
236 </div> |
209b663a4f62
planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff
changeset
|
237 """ |
8
ba45bc057d70
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
7
diff
changeset
|
238 return f"{plots_html}" |
0
209b663a4f62
planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff
changeset
|
239 |
2
77c88226bfde
planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
240 def encode_image_to_base64(self, img_path): |
8
ba45bc057d70
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
7
diff
changeset
|
241 with open(img_path, "rb") as img_file: |
ba45bc057d70
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
7
diff
changeset
|
242 return base64.b64encode(img_file.read()).decode("utf-8") |
0
209b663a4f62
planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff
changeset
|
243 |
2
77c88226bfde
planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
244 def run(self): |
4
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
245 if ( |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
246 self.exp is None |
8
ba45bc057d70
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
7
diff
changeset
|
247 or not hasattr(self.exp, "is_setup") |
4
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
248 or not self.exp.is_setup |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
249 ): |
2
77c88226bfde
planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
250 self.setup_pycaret() |
77c88226bfde
planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
251 self.save_tree_importance() |
77c88226bfde
planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
252 self.save_shap_values() |
8
ba45bc057d70
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
7
diff
changeset
|
253 return self.generate_html_report() |