Mercurial > repos > goeckslab > pycaret_predict
annotate feature_importance.py @ 12:e674b9e946fb draft default tip
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
author | goeckslab |
---|---|
date | Mon, 08 Sep 2025 22:39:12 +0000 |
parents | 4eca9d109de1 |
children |
rev | line source |
---|---|
0
1f20fe57fdee
planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff
changeset
|
1 import base64 |
1f20fe57fdee
planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff
changeset
|
2 import logging |
1f20fe57fdee
planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff
changeset
|
3 import os |
1f20fe57fdee
planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff
changeset
|
4 |
1f20fe57fdee
planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff
changeset
|
5 import matplotlib.pyplot as plt |
1f20fe57fdee
planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff
changeset
|
6 import pandas as pd |
6
a32ff7201629
planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
3
diff
changeset
|
7 import shap |
0
1f20fe57fdee
planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff
changeset
|
8 from pycaret.classification import ClassificationExperiment |
1f20fe57fdee
planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff
changeset
|
9 from pycaret.regression import RegressionExperiment |
1f20fe57fdee
planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff
changeset
|
10 |
1f20fe57fdee
planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff
changeset
|
11 logging.basicConfig(level=logging.DEBUG) |
1f20fe57fdee
planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff
changeset
|
12 LOG = logging.getLogger(__name__) |
1f20fe57fdee
planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff
changeset
|
13 |
1f20fe57fdee
planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff
changeset
|
14 |
1f20fe57fdee
planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff
changeset
|
15 class FeatureImportanceAnalyzer: |
1f20fe57fdee
planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff
changeset
|
16 def __init__( |
8
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
17 self, |
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
18 task_type, |
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
19 output_dir, |
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
20 data_path=None, |
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
21 data=None, |
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
22 target_col=None, |
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
23 exp=None, |
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
24 best_model=None, |
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
25 ): |
6
a32ff7201629
planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
3
diff
changeset
|
26 self.task_type = task_type |
a32ff7201629
planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
3
diff
changeset
|
27 self.output_dir = output_dir |
a32ff7201629
planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
3
diff
changeset
|
28 self.exp = exp |
a32ff7201629
planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
3
diff
changeset
|
29 self.best_model = best_model |
a32ff7201629
planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
3
diff
changeset
|
30 |
a32ff7201629
planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
3
diff
changeset
|
31 if exp is not None: |
a32ff7201629
planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
3
diff
changeset
|
32 # Assume all configs (data, target) are in exp |
a32ff7201629
planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
3
diff
changeset
|
33 self.data = exp.dataset.copy() |
a32ff7201629
planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
3
diff
changeset
|
34 self.target = exp.target_param |
a32ff7201629
planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
3
diff
changeset
|
35 LOG.info("Using provided experiment object") |
0
1f20fe57fdee
planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff
changeset
|
36 else: |
6
a32ff7201629
planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
3
diff
changeset
|
37 if data is not None: |
a32ff7201629
planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
3
diff
changeset
|
38 self.data = data |
a32ff7201629
planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
3
diff
changeset
|
39 LOG.info("Data loaded from memory") |
a32ff7201629
planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
3
diff
changeset
|
40 else: |
a32ff7201629
planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
3
diff
changeset
|
41 self.target_col = target_col |
12
e674b9e946fb
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
11
diff
changeset
|
42 self.data = pd.read_csv(data_path, sep=None, engine="python") |
e674b9e946fb
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
11
diff
changeset
|
43 self.data.columns = self.data.columns.str.replace(".", "_") |
6
a32ff7201629
planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
3
diff
changeset
|
44 self.data = self.data.fillna(self.data.median(numeric_only=True)) |
a32ff7201629
planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
3
diff
changeset
|
45 self.target = self.data.columns[int(target_col) - 1] |
8
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
46 self.exp = ( |
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
47 ClassificationExperiment() |
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
48 if task_type == "classification" |
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
49 else RegressionExperiment() |
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
50 ) |
6
a32ff7201629
planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
3
diff
changeset
|
51 |
0
1f20fe57fdee
planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff
changeset
|
52 self.plots = {} |
1f20fe57fdee
planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff
changeset
|
53 |
1f20fe57fdee
planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff
changeset
|
54 def setup_pycaret(self): |
12
e674b9e946fb
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
11
diff
changeset
|
55 if self.exp is not None and hasattr(self.exp, "is_setup") and self.exp.is_setup: |
6
a32ff7201629
planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
3
diff
changeset
|
56 LOG.info("Experiment already set up. Skipping PyCaret setup.") |
a32ff7201629
planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
3
diff
changeset
|
57 return |
0
1f20fe57fdee
planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff
changeset
|
58 LOG.info("Initializing PyCaret") |
1f20fe57fdee
planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff
changeset
|
59 setup_params = { |
12
e674b9e946fb
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
11
diff
changeset
|
60 "target": self.target, |
e674b9e946fb
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
11
diff
changeset
|
61 "session_id": 123, |
e674b9e946fb
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
11
diff
changeset
|
62 "html": True, |
e674b9e946fb
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
11
diff
changeset
|
63 "log_experiment": False, |
e674b9e946fb
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
11
diff
changeset
|
64 "system_log": False, |
0
1f20fe57fdee
planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff
changeset
|
65 } |
1f20fe57fdee
planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff
changeset
|
66 self.exp.setup(self.data, **setup_params) |
1f20fe57fdee
planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff
changeset
|
67 |
6
a32ff7201629
planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
3
diff
changeset
|
68 def save_tree_importance(self): |
12
e674b9e946fb
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
11
diff
changeset
|
69 model = self.best_model or self.exp.get_config("best_model") |
e674b9e946fb
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
11
diff
changeset
|
70 processed_features = self.exp.get_config("X_transformed").columns |
6
a32ff7201629
planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
3
diff
changeset
|
71 |
a32ff7201629
planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
3
diff
changeset
|
72 importances = None |
a32ff7201629
planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
3
diff
changeset
|
73 model_type = model.__class__.__name__ |
12
e674b9e946fb
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
11
diff
changeset
|
74 self.tree_model_name = model_type |
0
1f20fe57fdee
planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff
changeset
|
75 |
12
e674b9e946fb
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
11
diff
changeset
|
76 if hasattr(model, "feature_importances_"): |
6
a32ff7201629
planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
3
diff
changeset
|
77 importances = model.feature_importances_ |
12
e674b9e946fb
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
11
diff
changeset
|
78 elif hasattr(model, "coef_"): |
6
a32ff7201629
planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
3
diff
changeset
|
79 importances = abs(model.coef_).flatten() |
a32ff7201629
planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
3
diff
changeset
|
80 else: |
8
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
81 LOG.warning( |
12
e674b9e946fb
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
11
diff
changeset
|
82 f"Model {model_type} does not have feature_importances_ or coef_. Skipping tree importance." |
8
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
83 ) |
12
e674b9e946fb
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
11
diff
changeset
|
84 self.tree_model_name = None |
6
a32ff7201629
planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
3
diff
changeset
|
85 return |
a32ff7201629
planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
3
diff
changeset
|
86 |
a32ff7201629
planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
3
diff
changeset
|
87 if len(importances) != len(processed_features): |
a32ff7201629
planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
3
diff
changeset
|
88 LOG.warning( |
12
e674b9e946fb
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
11
diff
changeset
|
89 f"Importances ({len(importances)}) != features ({len(processed_features)}). Skipping tree importance." |
6
a32ff7201629
planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
3
diff
changeset
|
90 ) |
a32ff7201629
planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
3
diff
changeset
|
91 self.tree_model_name = None |
a32ff7201629
planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
3
diff
changeset
|
92 return |
a32ff7201629
planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
3
diff
changeset
|
93 |
8
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
94 feature_importances = pd.DataFrame( |
12
e674b9e946fb
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
11
diff
changeset
|
95 {"Feature": processed_features, "Importance": importances} |
e674b9e946fb
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
11
diff
changeset
|
96 ).sort_values(by="Importance", ascending=False) |
0
1f20fe57fdee
planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff
changeset
|
97 plt.figure(figsize=(10, 6)) |
12
e674b9e946fb
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
11
diff
changeset
|
98 plt.barh(feature_importances["Feature"], feature_importances["Importance"]) |
e674b9e946fb
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
11
diff
changeset
|
99 plt.xlabel("Importance") |
e674b9e946fb
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
11
diff
changeset
|
100 plt.title(f"Feature Importance ({model_type})") |
e674b9e946fb
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
11
diff
changeset
|
101 plot_path = os.path.join(self.output_dir, "tree_importance.png") |
e674b9e946fb
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
11
diff
changeset
|
102 plt.savefig(plot_path, bbox_inches="tight") |
0
1f20fe57fdee
planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff
changeset
|
103 plt.close() |
12
e674b9e946fb
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
11
diff
changeset
|
104 self.plots["tree_importance"] = plot_path |
0
1f20fe57fdee
planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff
changeset
|
105 |
12
e674b9e946fb
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
11
diff
changeset
|
106 def save_shap_values(self, max_samples=None, max_display=None, max_features=None): |
8
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
107 model = self.best_model or self.exp.get_config("best_model") |
6
a32ff7201629
planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
3
diff
changeset
|
108 |
8
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
109 X_data = None |
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
110 for key in ("X_test_transformed", "X_train_transformed"): |
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
111 try: |
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
112 X_data = self.exp.get_config(key) |
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
113 break |
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
114 except KeyError: |
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
115 continue |
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
116 if X_data is None: |
12
e674b9e946fb
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
11
diff
changeset
|
117 raise RuntimeError("No transformed dataset found for SHAP.") |
e674b9e946fb
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
11
diff
changeset
|
118 |
e674b9e946fb
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
11
diff
changeset
|
119 # --- Adaptive feature limiting (proportional cap) --- |
e674b9e946fb
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
11
diff
changeset
|
120 n_rows, n_features = X_data.shape |
e674b9e946fb
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
11
diff
changeset
|
121 if max_features is None: |
e674b9e946fb
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
11
diff
changeset
|
122 if n_features <= 200: |
e674b9e946fb
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
11
diff
changeset
|
123 max_features = n_features |
e674b9e946fb
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
11
diff
changeset
|
124 else: |
e674b9e946fb
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
11
diff
changeset
|
125 max_features = min(200, max(20, int(n_features * 0.1))) |
8
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
126 |
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
127 try: |
12
e674b9e946fb
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
11
diff
changeset
|
128 if hasattr(model, "feature_importances_"): |
e674b9e946fb
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
11
diff
changeset
|
129 importances = pd.Series( |
e674b9e946fb
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
11
diff
changeset
|
130 model.feature_importances_, index=X_data.columns |
e674b9e946fb
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
11
diff
changeset
|
131 ) |
e674b9e946fb
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
11
diff
changeset
|
132 top_features = importances.nlargest(max_features).index |
e674b9e946fb
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
11
diff
changeset
|
133 elif hasattr(model, "coef_"): |
e674b9e946fb
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
11
diff
changeset
|
134 coef = abs(model.coef_).flatten() |
e674b9e946fb
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
11
diff
changeset
|
135 importances = pd.Series(coef, index=X_data.columns) |
e674b9e946fb
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
11
diff
changeset
|
136 top_features = importances.nlargest(max_features).index |
e674b9e946fb
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
11
diff
changeset
|
137 else: |
e674b9e946fb
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
11
diff
changeset
|
138 variances = X_data.var() |
e674b9e946fb
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
11
diff
changeset
|
139 top_features = variances.nlargest(max_features).index |
e674b9e946fb
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
11
diff
changeset
|
140 |
e674b9e946fb
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
11
diff
changeset
|
141 if len(top_features) < n_features: |
e674b9e946fb
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
11
diff
changeset
|
142 LOG.info( |
e674b9e946fb
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
11
diff
changeset
|
143 f"Restricted SHAP computation to top {len(top_features)} / {n_features} features" |
e674b9e946fb
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
11
diff
changeset
|
144 ) |
e674b9e946fb
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
11
diff
changeset
|
145 X_data = X_data[top_features] |
e674b9e946fb
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
11
diff
changeset
|
146 except Exception as e: |
e674b9e946fb
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
11
diff
changeset
|
147 LOG.warning( |
e674b9e946fb
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
11
diff
changeset
|
148 f"Feature limiting failed: {e}. Using all {n_features} features." |
e674b9e946fb
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
11
diff
changeset
|
149 ) |
8
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
150 |
12
e674b9e946fb
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
11
diff
changeset
|
151 # --- Adaptive row subsampling --- |
e674b9e946fb
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
11
diff
changeset
|
152 if max_samples is None: |
e674b9e946fb
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
11
diff
changeset
|
153 if n_rows <= 500: |
e674b9e946fb
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
11
diff
changeset
|
154 max_samples = n_rows |
e674b9e946fb
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
11
diff
changeset
|
155 elif n_rows <= 5000: |
e674b9e946fb
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
11
diff
changeset
|
156 max_samples = 500 |
e674b9e946fb
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
11
diff
changeset
|
157 else: |
e674b9e946fb
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
11
diff
changeset
|
158 max_samples = min(1000, int(n_rows * 0.1)) |
e674b9e946fb
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
11
diff
changeset
|
159 |
e674b9e946fb
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
11
diff
changeset
|
160 if n_rows > max_samples: |
e674b9e946fb
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
11
diff
changeset
|
161 LOG.info(f"Subsampling SHAP rows: {max_samples} of {n_rows}") |
e674b9e946fb
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
11
diff
changeset
|
162 X_data = X_data.sample(max_samples, random_state=42) |
8
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
163 |
12
e674b9e946fb
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
11
diff
changeset
|
164 # --- Adaptive feature display --- |
e674b9e946fb
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
11
diff
changeset
|
165 if max_display is None: |
e674b9e946fb
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
11
diff
changeset
|
166 if X_data.shape[1] <= 20: |
e674b9e946fb
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
11
diff
changeset
|
167 max_display = X_data.shape[1] |
e674b9e946fb
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
11
diff
changeset
|
168 elif X_data.shape[1] <= 100: |
e674b9e946fb
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
11
diff
changeset
|
169 max_display = 30 |
e674b9e946fb
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
11
diff
changeset
|
170 else: |
e674b9e946fb
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
11
diff
changeset
|
171 max_display = 50 |
e674b9e946fb
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
11
diff
changeset
|
172 |
e674b9e946fb
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
11
diff
changeset
|
173 # Background set |
e674b9e946fb
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
11
diff
changeset
|
174 bg = X_data.sample(min(len(X_data), 100), random_state=42) |
e674b9e946fb
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
11
diff
changeset
|
175 predict_fn = ( |
e674b9e946fb
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
11
diff
changeset
|
176 model.predict_proba if hasattr(model, "predict_proba") else model.predict |
e674b9e946fb
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
11
diff
changeset
|
177 ) |
e674b9e946fb
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
11
diff
changeset
|
178 |
e674b9e946fb
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
11
diff
changeset
|
179 # Optimized explainer |
e674b9e946fb
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
11
diff
changeset
|
180 if hasattr(model, "feature_importances_"): |
e674b9e946fb
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
11
diff
changeset
|
181 explainer = shap.TreeExplainer( |
e674b9e946fb
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
11
diff
changeset
|
182 model, bg, feature_perturbation="tree_path_dependent", n_jobs=-1 |
e674b9e946fb
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
11
diff
changeset
|
183 ) |
e674b9e946fb
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
11
diff
changeset
|
184 elif hasattr(model, "coef_"): |
e674b9e946fb
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
11
diff
changeset
|
185 explainer = shap.LinearExplainer(model, bg) |
e674b9e946fb
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
11
diff
changeset
|
186 else: |
e674b9e946fb
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
11
diff
changeset
|
187 explainer = shap.Explainer(predict_fn, bg) |
6
a32ff7201629
planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
3
diff
changeset
|
188 |
11
4eca9d109de1
planemo upload for repository https://github.com/goeckslab/gleam commit 55deacbbc78a00f27d789e11d563ba49dfb9cf9e
goeckslab
parents:
8
diff
changeset
|
189 try: |
12
e674b9e946fb
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
11
diff
changeset
|
190 shap_values = explainer(X_data) |
11
4eca9d109de1
planemo upload for repository https://github.com/goeckslab/gleam commit 55deacbbc78a00f27d789e11d563ba49dfb9cf9e
goeckslab
parents:
8
diff
changeset
|
191 self.shap_model_name = explainer.__class__.__name__ |
4eca9d109de1
planemo upload for repository https://github.com/goeckslab/gleam commit 55deacbbc78a00f27d789e11d563ba49dfb9cf9e
goeckslab
parents:
8
diff
changeset
|
192 except Exception as e: |
4eca9d109de1
planemo upload for repository https://github.com/goeckslab/gleam commit 55deacbbc78a00f27d789e11d563ba49dfb9cf9e
goeckslab
parents:
8
diff
changeset
|
193 LOG.error(f"SHAP computation failed: {e}") |
4eca9d109de1
planemo upload for repository https://github.com/goeckslab/gleam commit 55deacbbc78a00f27d789e11d563ba49dfb9cf9e
goeckslab
parents:
8
diff
changeset
|
194 self.shap_model_name = None |
4eca9d109de1
planemo upload for repository https://github.com/goeckslab/gleam commit 55deacbbc78a00f27d789e11d563ba49dfb9cf9e
goeckslab
parents:
8
diff
changeset
|
195 return |
8
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
196 |
12
e674b9e946fb
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
11
diff
changeset
|
197 # --- Plot SHAP summary --- |
e674b9e946fb
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
11
diff
changeset
|
198 out_path = os.path.join(self.output_dir, "shap_summary.png") |
e674b9e946fb
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
11
diff
changeset
|
199 plt.figure() |
e674b9e946fb
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
11
diff
changeset
|
200 shap.plots.beeswarm(shap_values, max_display=max_display, show=False) |
e674b9e946fb
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
11
diff
changeset
|
201 plt.title( |
e674b9e946fb
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
11
diff
changeset
|
202 f"SHAP Summary for {model.__class__.__name__} (top {max_display} features)" |
e674b9e946fb
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
11
diff
changeset
|
203 ) |
e674b9e946fb
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
11
diff
changeset
|
204 plt.savefig(out_path, bbox_inches="tight") |
e674b9e946fb
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
11
diff
changeset
|
205 plt.close() |
e674b9e946fb
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
11
diff
changeset
|
206 self.plots["shap_summary"] = out_path |
8
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
207 |
12
e674b9e946fb
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
11
diff
changeset
|
208 # --- Log summary --- |
e674b9e946fb
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
11
diff
changeset
|
209 LOG.info( |
e674b9e946fb
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
11
diff
changeset
|
210 f"SHAP summary completed with {X_data.shape[0]} rows and {X_data.shape[1]} features (displaying top {max_display})." |
e674b9e946fb
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
11
diff
changeset
|
211 ) |
0
1f20fe57fdee
planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff
changeset
|
212 |
1f20fe57fdee
planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff
changeset
|
213 def generate_html_report(self): |
1f20fe57fdee
planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff
changeset
|
214 LOG.info("Generating HTML report") |
1f20fe57fdee
planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff
changeset
|
215 plots_html = "" |
1f20fe57fdee
planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff
changeset
|
216 for plot_name, plot_path in self.plots.items(): |
12
e674b9e946fb
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
11
diff
changeset
|
217 if plot_name == "tree_importance" and not getattr( |
e674b9e946fb
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
11
diff
changeset
|
218 self, "tree_model_name", None |
8
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
219 ): |
6
a32ff7201629
planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
3
diff
changeset
|
220 continue |
0
1f20fe57fdee
planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff
changeset
|
221 encoded_image = self.encode_image_to_base64(plot_path) |
12
e674b9e946fb
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
11
diff
changeset
|
222 if plot_name == "tree_importance" and getattr( |
e674b9e946fb
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
11
diff
changeset
|
223 self, "tree_model_name", None |
8
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
224 ): |
12
e674b9e946fb
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
11
diff
changeset
|
225 section_title = f"Feature importance from {self.tree_model_name}" |
e674b9e946fb
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
11
diff
changeset
|
226 elif plot_name == "shap_summary": |
8
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
227 section_title = ( |
12
e674b9e946fb
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
11
diff
changeset
|
228 f"SHAP Summary from {getattr(self, 'shap_model_name', 'model')}" |
8
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
229 ) |
6
a32ff7201629
planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
3
diff
changeset
|
230 else: |
a32ff7201629
planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
3
diff
changeset
|
231 section_title = plot_name |
0
1f20fe57fdee
planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff
changeset
|
232 plots_html += f""" |
1f20fe57fdee
planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff
changeset
|
233 <div class="plot" id="{plot_name}"> |
6
a32ff7201629
planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
3
diff
changeset
|
234 <h2>{section_title}</h2> |
a32ff7201629
planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
3
diff
changeset
|
235 <img src="data:image/png;base64,{encoded_image}" alt="{plot_name}"> |
0
1f20fe57fdee
planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff
changeset
|
236 </div> |
1f20fe57fdee
planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff
changeset
|
237 """ |
12
e674b9e946fb
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
11
diff
changeset
|
238 return f"{plots_html}" |
0
1f20fe57fdee
planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff
changeset
|
239 |
6
a32ff7201629
planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
3
diff
changeset
|
240 def encode_image_to_base64(self, img_path): |
12
e674b9e946fb
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
11
diff
changeset
|
241 with open(img_path, "rb") as img_file: |
e674b9e946fb
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
11
diff
changeset
|
242 return base64.b64encode(img_file.read()).decode("utf-8") |
0
1f20fe57fdee
planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff
changeset
|
243 |
6
a32ff7201629
planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
3
diff
changeset
|
244 def run(self): |
8
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
245 if ( |
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
246 self.exp is None |
12
e674b9e946fb
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
11
diff
changeset
|
247 or not hasattr(self.exp, "is_setup") |
8
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
248 or not self.exp.is_setup |
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
249 ): |
6
a32ff7201629
planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
3
diff
changeset
|
250 self.setup_pycaret() |
a32ff7201629
planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
3
diff
changeset
|
251 self.save_tree_importance() |
a32ff7201629
planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
3
diff
changeset
|
252 self.save_shap_values() |
12
e674b9e946fb
planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
11
diff
changeset
|
253 return self.generate_html_report() |