annotate feature_importance.py @ 12:e674b9e946fb draft default tip

planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
author goeckslab
date Mon, 08 Sep 2025 22:39:12 +0000
parents 4eca9d109de1
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
1 import base64
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
2 import logging
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
3 import os
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
4
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
5 import matplotlib.pyplot as plt
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
6 import pandas as pd
6
a32ff7201629 planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 3
diff changeset
7 import shap
0
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
8 from pycaret.classification import ClassificationExperiment
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
9 from pycaret.regression import RegressionExperiment
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
10
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
11 logging.basicConfig(level=logging.DEBUG)
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
12 LOG = logging.getLogger(__name__)
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
13
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
14
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
15 class FeatureImportanceAnalyzer:
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
16 def __init__(
8
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
17 self,
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
18 task_type,
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
19 output_dir,
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
20 data_path=None,
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
21 data=None,
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
22 target_col=None,
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
23 exp=None,
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
24 best_model=None,
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
25 ):
6
a32ff7201629 planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 3
diff changeset
26 self.task_type = task_type
a32ff7201629 planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 3
diff changeset
27 self.output_dir = output_dir
a32ff7201629 planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 3
diff changeset
28 self.exp = exp
a32ff7201629 planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 3
diff changeset
29 self.best_model = best_model
a32ff7201629 planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 3
diff changeset
30
a32ff7201629 planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 3
diff changeset
31 if exp is not None:
a32ff7201629 planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 3
diff changeset
32 # Assume all configs (data, target) are in exp
a32ff7201629 planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 3
diff changeset
33 self.data = exp.dataset.copy()
a32ff7201629 planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 3
diff changeset
34 self.target = exp.target_param
a32ff7201629 planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 3
diff changeset
35 LOG.info("Using provided experiment object")
0
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
36 else:
6
a32ff7201629 planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 3
diff changeset
37 if data is not None:
a32ff7201629 planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 3
diff changeset
38 self.data = data
a32ff7201629 planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 3
diff changeset
39 LOG.info("Data loaded from memory")
a32ff7201629 planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 3
diff changeset
40 else:
a32ff7201629 planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 3
diff changeset
41 self.target_col = target_col
12
e674b9e946fb planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 11
diff changeset
42 self.data = pd.read_csv(data_path, sep=None, engine="python")
e674b9e946fb planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 11
diff changeset
43 self.data.columns = self.data.columns.str.replace(".", "_")
6
a32ff7201629 planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 3
diff changeset
44 self.data = self.data.fillna(self.data.median(numeric_only=True))
a32ff7201629 planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 3
diff changeset
45 self.target = self.data.columns[int(target_col) - 1]
8
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
46 self.exp = (
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
47 ClassificationExperiment()
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
48 if task_type == "classification"
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
49 else RegressionExperiment()
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
50 )
6
a32ff7201629 planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 3
diff changeset
51
0
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
52 self.plots = {}
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
53
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
54 def setup_pycaret(self):
12
e674b9e946fb planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 11
diff changeset
55 if self.exp is not None and hasattr(self.exp, "is_setup") and self.exp.is_setup:
6
a32ff7201629 planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 3
diff changeset
56 LOG.info("Experiment already set up. Skipping PyCaret setup.")
a32ff7201629 planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 3
diff changeset
57 return
0
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
58 LOG.info("Initializing PyCaret")
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
59 setup_params = {
12
e674b9e946fb planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 11
diff changeset
60 "target": self.target,
e674b9e946fb planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 11
diff changeset
61 "session_id": 123,
e674b9e946fb planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 11
diff changeset
62 "html": True,
e674b9e946fb planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 11
diff changeset
63 "log_experiment": False,
e674b9e946fb planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 11
diff changeset
64 "system_log": False,
0
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
65 }
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
66 self.exp.setup(self.data, **setup_params)
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
67
6
a32ff7201629 planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 3
diff changeset
68 def save_tree_importance(self):
12
e674b9e946fb planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 11
diff changeset
69 model = self.best_model or self.exp.get_config("best_model")
e674b9e946fb planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 11
diff changeset
70 processed_features = self.exp.get_config("X_transformed").columns
6
a32ff7201629 planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 3
diff changeset
71
a32ff7201629 planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 3
diff changeset
72 importances = None
a32ff7201629 planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 3
diff changeset
73 model_type = model.__class__.__name__
12
e674b9e946fb planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 11
diff changeset
74 self.tree_model_name = model_type
0
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
75
12
e674b9e946fb planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 11
diff changeset
76 if hasattr(model, "feature_importances_"):
6
a32ff7201629 planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 3
diff changeset
77 importances = model.feature_importances_
12
e674b9e946fb planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 11
diff changeset
78 elif hasattr(model, "coef_"):
6
a32ff7201629 planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 3
diff changeset
79 importances = abs(model.coef_).flatten()
a32ff7201629 planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 3
diff changeset
80 else:
8
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
81 LOG.warning(
12
e674b9e946fb planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 11
diff changeset
82 f"Model {model_type} does not have feature_importances_ or coef_. Skipping tree importance."
8
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
83 )
12
e674b9e946fb planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 11
diff changeset
84 self.tree_model_name = None
6
a32ff7201629 planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 3
diff changeset
85 return
a32ff7201629 planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 3
diff changeset
86
a32ff7201629 planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 3
diff changeset
87 if len(importances) != len(processed_features):
a32ff7201629 planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 3
diff changeset
88 LOG.warning(
12
e674b9e946fb planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 11
diff changeset
89 f"Importances ({len(importances)}) != features ({len(processed_features)}). Skipping tree importance."
6
a32ff7201629 planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 3
diff changeset
90 )
a32ff7201629 planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 3
diff changeset
91 self.tree_model_name = None
a32ff7201629 planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 3
diff changeset
92 return
a32ff7201629 planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 3
diff changeset
93
8
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
94 feature_importances = pd.DataFrame(
12
e674b9e946fb planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 11
diff changeset
95 {"Feature": processed_features, "Importance": importances}
e674b9e946fb planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 11
diff changeset
96 ).sort_values(by="Importance", ascending=False)
0
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
97 plt.figure(figsize=(10, 6))
12
e674b9e946fb planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 11
diff changeset
98 plt.barh(feature_importances["Feature"], feature_importances["Importance"])
e674b9e946fb planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 11
diff changeset
99 plt.xlabel("Importance")
e674b9e946fb planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 11
diff changeset
100 plt.title(f"Feature Importance ({model_type})")
e674b9e946fb planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 11
diff changeset
101 plot_path = os.path.join(self.output_dir, "tree_importance.png")
e674b9e946fb planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 11
diff changeset
102 plt.savefig(plot_path, bbox_inches="tight")
0
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
103 plt.close()
12
e674b9e946fb planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 11
diff changeset
104 self.plots["tree_importance"] = plot_path
0
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
105
12
e674b9e946fb planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 11
diff changeset
106 def save_shap_values(self, max_samples=None, max_display=None, max_features=None):
8
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
107 model = self.best_model or self.exp.get_config("best_model")
6
a32ff7201629 planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 3
diff changeset
108
8
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
109 X_data = None
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
110 for key in ("X_test_transformed", "X_train_transformed"):
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
111 try:
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
112 X_data = self.exp.get_config(key)
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
113 break
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
114 except KeyError:
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
115 continue
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
116 if X_data is None:
12
e674b9e946fb planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 11
diff changeset
117 raise RuntimeError("No transformed dataset found for SHAP.")
e674b9e946fb planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 11
diff changeset
118
e674b9e946fb planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 11
diff changeset
119 # --- Adaptive feature limiting (proportional cap) ---
e674b9e946fb planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 11
diff changeset
120 n_rows, n_features = X_data.shape
e674b9e946fb planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 11
diff changeset
121 if max_features is None:
e674b9e946fb planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 11
diff changeset
122 if n_features <= 200:
e674b9e946fb planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 11
diff changeset
123 max_features = n_features
e674b9e946fb planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 11
diff changeset
124 else:
e674b9e946fb planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 11
diff changeset
125 max_features = min(200, max(20, int(n_features * 0.1)))
8
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
126
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
127 try:
12
e674b9e946fb planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 11
diff changeset
128 if hasattr(model, "feature_importances_"):
e674b9e946fb planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 11
diff changeset
129 importances = pd.Series(
e674b9e946fb planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 11
diff changeset
130 model.feature_importances_, index=X_data.columns
e674b9e946fb planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 11
diff changeset
131 )
e674b9e946fb planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 11
diff changeset
132 top_features = importances.nlargest(max_features).index
e674b9e946fb planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 11
diff changeset
133 elif hasattr(model, "coef_"):
e674b9e946fb planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 11
diff changeset
134 coef = abs(model.coef_).flatten()
e674b9e946fb planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 11
diff changeset
135 importances = pd.Series(coef, index=X_data.columns)
e674b9e946fb planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 11
diff changeset
136 top_features = importances.nlargest(max_features).index
e674b9e946fb planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 11
diff changeset
137 else:
e674b9e946fb planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 11
diff changeset
138 variances = X_data.var()
e674b9e946fb planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 11
diff changeset
139 top_features = variances.nlargest(max_features).index
e674b9e946fb planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 11
diff changeset
140
e674b9e946fb planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 11
diff changeset
141 if len(top_features) < n_features:
e674b9e946fb planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 11
diff changeset
142 LOG.info(
e674b9e946fb planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 11
diff changeset
143 f"Restricted SHAP computation to top {len(top_features)} / {n_features} features"
e674b9e946fb planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 11
diff changeset
144 )
e674b9e946fb planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 11
diff changeset
145 X_data = X_data[top_features]
e674b9e946fb planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 11
diff changeset
146 except Exception as e:
e674b9e946fb planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 11
diff changeset
147 LOG.warning(
e674b9e946fb planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 11
diff changeset
148 f"Feature limiting failed: {e}. Using all {n_features} features."
e674b9e946fb planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 11
diff changeset
149 )
8
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
150
12
e674b9e946fb planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 11
diff changeset
151 # --- Adaptive row subsampling ---
e674b9e946fb planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 11
diff changeset
152 if max_samples is None:
e674b9e946fb planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 11
diff changeset
153 if n_rows <= 500:
e674b9e946fb planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 11
diff changeset
154 max_samples = n_rows
e674b9e946fb planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 11
diff changeset
155 elif n_rows <= 5000:
e674b9e946fb planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 11
diff changeset
156 max_samples = 500
e674b9e946fb planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 11
diff changeset
157 else:
e674b9e946fb planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 11
diff changeset
158 max_samples = min(1000, int(n_rows * 0.1))
e674b9e946fb planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 11
diff changeset
159
e674b9e946fb planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 11
diff changeset
160 if n_rows > max_samples:
e674b9e946fb planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 11
diff changeset
161 LOG.info(f"Subsampling SHAP rows: {max_samples} of {n_rows}")
e674b9e946fb planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 11
diff changeset
162 X_data = X_data.sample(max_samples, random_state=42)
8
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
163
12
e674b9e946fb planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 11
diff changeset
164 # --- Adaptive feature display ---
e674b9e946fb planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 11
diff changeset
165 if max_display is None:
e674b9e946fb planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 11
diff changeset
166 if X_data.shape[1] <= 20:
e674b9e946fb planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 11
diff changeset
167 max_display = X_data.shape[1]
e674b9e946fb planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 11
diff changeset
168 elif X_data.shape[1] <= 100:
e674b9e946fb planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 11
diff changeset
169 max_display = 30
e674b9e946fb planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 11
diff changeset
170 else:
e674b9e946fb planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 11
diff changeset
171 max_display = 50
e674b9e946fb planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 11
diff changeset
172
e674b9e946fb planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 11
diff changeset
173 # Background set
e674b9e946fb planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 11
diff changeset
174 bg = X_data.sample(min(len(X_data), 100), random_state=42)
e674b9e946fb planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 11
diff changeset
175 predict_fn = (
e674b9e946fb planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 11
diff changeset
176 model.predict_proba if hasattr(model, "predict_proba") else model.predict
e674b9e946fb planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 11
diff changeset
177 )
e674b9e946fb planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 11
diff changeset
178
e674b9e946fb planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 11
diff changeset
179 # Optimized explainer
e674b9e946fb planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 11
diff changeset
180 if hasattr(model, "feature_importances_"):
e674b9e946fb planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 11
diff changeset
181 explainer = shap.TreeExplainer(
e674b9e946fb planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 11
diff changeset
182 model, bg, feature_perturbation="tree_path_dependent", n_jobs=-1
e674b9e946fb planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 11
diff changeset
183 )
e674b9e946fb planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 11
diff changeset
184 elif hasattr(model, "coef_"):
e674b9e946fb planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 11
diff changeset
185 explainer = shap.LinearExplainer(model, bg)
e674b9e946fb planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 11
diff changeset
186 else:
e674b9e946fb planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 11
diff changeset
187 explainer = shap.Explainer(predict_fn, bg)
6
a32ff7201629 planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 3
diff changeset
188
11
4eca9d109de1 planemo upload for repository https://github.com/goeckslab/gleam commit 55deacbbc78a00f27d789e11d563ba49dfb9cf9e
goeckslab
parents: 8
diff changeset
189 try:
12
e674b9e946fb planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 11
diff changeset
190 shap_values = explainer(X_data)
11
4eca9d109de1 planemo upload for repository https://github.com/goeckslab/gleam commit 55deacbbc78a00f27d789e11d563ba49dfb9cf9e
goeckslab
parents: 8
diff changeset
191 self.shap_model_name = explainer.__class__.__name__
4eca9d109de1 planemo upload for repository https://github.com/goeckslab/gleam commit 55deacbbc78a00f27d789e11d563ba49dfb9cf9e
goeckslab
parents: 8
diff changeset
192 except Exception as e:
4eca9d109de1 planemo upload for repository https://github.com/goeckslab/gleam commit 55deacbbc78a00f27d789e11d563ba49dfb9cf9e
goeckslab
parents: 8
diff changeset
193 LOG.error(f"SHAP computation failed: {e}")
4eca9d109de1 planemo upload for repository https://github.com/goeckslab/gleam commit 55deacbbc78a00f27d789e11d563ba49dfb9cf9e
goeckslab
parents: 8
diff changeset
194 self.shap_model_name = None
4eca9d109de1 planemo upload for repository https://github.com/goeckslab/gleam commit 55deacbbc78a00f27d789e11d563ba49dfb9cf9e
goeckslab
parents: 8
diff changeset
195 return
8
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
196
12
e674b9e946fb planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 11
diff changeset
197 # --- Plot SHAP summary ---
e674b9e946fb planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 11
diff changeset
198 out_path = os.path.join(self.output_dir, "shap_summary.png")
e674b9e946fb planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 11
diff changeset
199 plt.figure()
e674b9e946fb planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 11
diff changeset
200 shap.plots.beeswarm(shap_values, max_display=max_display, show=False)
e674b9e946fb planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 11
diff changeset
201 plt.title(
e674b9e946fb planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 11
diff changeset
202 f"SHAP Summary for {model.__class__.__name__} (top {max_display} features)"
e674b9e946fb planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 11
diff changeset
203 )
e674b9e946fb planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 11
diff changeset
204 plt.savefig(out_path, bbox_inches="tight")
e674b9e946fb planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 11
diff changeset
205 plt.close()
e674b9e946fb planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 11
diff changeset
206 self.plots["shap_summary"] = out_path
8
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
207
12
e674b9e946fb planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 11
diff changeset
208 # --- Log summary ---
e674b9e946fb planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 11
diff changeset
209 LOG.info(
e674b9e946fb planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 11
diff changeset
210 f"SHAP summary completed with {X_data.shape[0]} rows and {X_data.shape[1]} features (displaying top {max_display})."
e674b9e946fb planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 11
diff changeset
211 )
0
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
212
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
213 def generate_html_report(self):
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
214 LOG.info("Generating HTML report")
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
215 plots_html = ""
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
216 for plot_name, plot_path in self.plots.items():
12
e674b9e946fb planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 11
diff changeset
217 if plot_name == "tree_importance" and not getattr(
e674b9e946fb planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 11
diff changeset
218 self, "tree_model_name", None
8
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
219 ):
6
a32ff7201629 planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 3
diff changeset
220 continue
0
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
221 encoded_image = self.encode_image_to_base64(plot_path)
12
e674b9e946fb planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 11
diff changeset
222 if plot_name == "tree_importance" and getattr(
e674b9e946fb planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 11
diff changeset
223 self, "tree_model_name", None
8
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
224 ):
12
e674b9e946fb planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 11
diff changeset
225 section_title = f"Feature importance from {self.tree_model_name}"
e674b9e946fb planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 11
diff changeset
226 elif plot_name == "shap_summary":
8
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
227 section_title = (
12
e674b9e946fb planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 11
diff changeset
228 f"SHAP Summary from {getattr(self, 'shap_model_name', 'model')}"
8
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
229 )
6
a32ff7201629 planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 3
diff changeset
230 else:
a32ff7201629 planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 3
diff changeset
231 section_title = plot_name
0
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
232 plots_html += f"""
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
233 <div class="plot" id="{plot_name}">
6
a32ff7201629 planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 3
diff changeset
234 <h2>{section_title}</h2>
a32ff7201629 planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 3
diff changeset
235 <img src="data:image/png;base64,{encoded_image}" alt="{plot_name}">
0
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
236 </div>
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
237 """
12
e674b9e946fb planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 11
diff changeset
238 return f"{plots_html}"
0
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
239
6
a32ff7201629 planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 3
diff changeset
240 def encode_image_to_base64(self, img_path):
12
e674b9e946fb planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 11
diff changeset
241 with open(img_path, "rb") as img_file:
e674b9e946fb planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 11
diff changeset
242 return base64.b64encode(img_file.read()).decode("utf-8")
0
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
243
6
a32ff7201629 planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 3
diff changeset
244 def run(self):
8
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
245 if (
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
246 self.exp is None
12
e674b9e946fb planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 11
diff changeset
247 or not hasattr(self.exp, "is_setup")
8
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
248 or not self.exp.is_setup
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
249 ):
6
a32ff7201629 planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 3
diff changeset
250 self.setup_pycaret()
a32ff7201629 planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 3
diff changeset
251 self.save_tree_importance()
a32ff7201629 planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 3
diff changeset
252 self.save_shap_values()
12
e674b9e946fb planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 11
diff changeset
253 return self.generate_html_report()