annotate feature_importance.py @ 8:ba45bc057d70 draft default tip

planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
author goeckslab
date Mon, 08 Sep 2025 22:38:55 +0000
parents 0afd970bd8ae
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
1 import base64
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
2 import logging
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
3 import os
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
4
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
5 import matplotlib.pyplot as plt
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
6 import pandas as pd
2
77c88226bfde planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
7 import shap
0
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
8 from pycaret.classification import ClassificationExperiment
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
9 from pycaret.regression import RegressionExperiment
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
10
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
11 logging.basicConfig(level=logging.DEBUG)
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
12 LOG = logging.getLogger(__name__)
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
13
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
14
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
15 class FeatureImportanceAnalyzer:
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
16 def __init__(
4
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
17 self,
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
18 task_type,
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
19 output_dir,
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
20 data_path=None,
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
21 data=None,
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
22 target_col=None,
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
23 exp=None,
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
24 best_model=None,
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
25 ):
2
77c88226bfde planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
26 self.task_type = task_type
77c88226bfde planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
27 self.output_dir = output_dir
77c88226bfde planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
28 self.exp = exp
77c88226bfde planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
29 self.best_model = best_model
77c88226bfde planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
30
77c88226bfde planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
31 if exp is not None:
77c88226bfde planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
32 # Assume all configs (data, target) are in exp
77c88226bfde planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
33 self.data = exp.dataset.copy()
77c88226bfde planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
34 self.target = exp.target_param
77c88226bfde planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
35 LOG.info("Using provided experiment object")
0
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
36 else:
2
77c88226bfde planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
37 if data is not None:
77c88226bfde planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
38 self.data = data
77c88226bfde planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
39 LOG.info("Data loaded from memory")
77c88226bfde planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
40 else:
77c88226bfde planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
41 self.target_col = target_col
8
ba45bc057d70 planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 7
diff changeset
42 self.data = pd.read_csv(data_path, sep=None, engine="python")
ba45bc057d70 planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 7
diff changeset
43 self.data.columns = self.data.columns.str.replace(".", "_")
2
77c88226bfde planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
44 self.data = self.data.fillna(self.data.median(numeric_only=True))
77c88226bfde planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
45 self.target = self.data.columns[int(target_col) - 1]
4
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
46 self.exp = (
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
47 ClassificationExperiment()
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
48 if task_type == "classification"
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
49 else RegressionExperiment()
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
50 )
2
77c88226bfde planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
51
0
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
52 self.plots = {}
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
53
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
54 def setup_pycaret(self):
8
ba45bc057d70 planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 7
diff changeset
55 if self.exp is not None and hasattr(self.exp, "is_setup") and self.exp.is_setup:
2
77c88226bfde planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
56 LOG.info("Experiment already set up. Skipping PyCaret setup.")
77c88226bfde planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
57 return
0
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
58 LOG.info("Initializing PyCaret")
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
59 setup_params = {
8
ba45bc057d70 planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 7
diff changeset
60 "target": self.target,
ba45bc057d70 planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 7
diff changeset
61 "session_id": 123,
ba45bc057d70 planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 7
diff changeset
62 "html": True,
ba45bc057d70 planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 7
diff changeset
63 "log_experiment": False,
ba45bc057d70 planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 7
diff changeset
64 "system_log": False,
0
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
65 }
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
66 self.exp.setup(self.data, **setup_params)
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
67
2
77c88226bfde planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
68 def save_tree_importance(self):
8
ba45bc057d70 planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 7
diff changeset
69 model = self.best_model or self.exp.get_config("best_model")
ba45bc057d70 planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 7
diff changeset
70 processed_features = self.exp.get_config("X_transformed").columns
2
77c88226bfde planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
71
77c88226bfde planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
72 importances = None
77c88226bfde planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
73 model_type = model.__class__.__name__
8
ba45bc057d70 planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 7
diff changeset
74 self.tree_model_name = model_type
0
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
75
8
ba45bc057d70 planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 7
diff changeset
76 if hasattr(model, "feature_importances_"):
2
77c88226bfde planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
77 importances = model.feature_importances_
8
ba45bc057d70 planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 7
diff changeset
78 elif hasattr(model, "coef_"):
2
77c88226bfde planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
79 importances = abs(model.coef_).flatten()
77c88226bfde planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
80 else:
4
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
81 LOG.warning(
8
ba45bc057d70 planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 7
diff changeset
82 f"Model {model_type} does not have feature_importances_ or coef_. Skipping tree importance."
4
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
83 )
8
ba45bc057d70 planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 7
diff changeset
84 self.tree_model_name = None
2
77c88226bfde planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
85 return
77c88226bfde planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
86
77c88226bfde planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
87 if len(importances) != len(processed_features):
77c88226bfde planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
88 LOG.warning(
8
ba45bc057d70 planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 7
diff changeset
89 f"Importances ({len(importances)}) != features ({len(processed_features)}). Skipping tree importance."
2
77c88226bfde planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
90 )
77c88226bfde planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
91 self.tree_model_name = None
77c88226bfde planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
92 return
77c88226bfde planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
93
4
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
94 feature_importances = pd.DataFrame(
8
ba45bc057d70 planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 7
diff changeset
95 {"Feature": processed_features, "Importance": importances}
ba45bc057d70 planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 7
diff changeset
96 ).sort_values(by="Importance", ascending=False)
0
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
97 plt.figure(figsize=(10, 6))
8
ba45bc057d70 planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 7
diff changeset
98 plt.barh(feature_importances["Feature"], feature_importances["Importance"])
ba45bc057d70 planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 7
diff changeset
99 plt.xlabel("Importance")
ba45bc057d70 planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 7
diff changeset
100 plt.title(f"Feature Importance ({model_type})")
ba45bc057d70 planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 7
diff changeset
101 plot_path = os.path.join(self.output_dir, "tree_importance.png")
ba45bc057d70 planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 7
diff changeset
102 plt.savefig(plot_path, bbox_inches="tight")
0
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
103 plt.close()
8
ba45bc057d70 planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 7
diff changeset
104 self.plots["tree_importance"] = plot_path
0
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
105
8
ba45bc057d70 planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 7
diff changeset
106 def save_shap_values(self, max_samples=None, max_display=None, max_features=None):
4
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
107 model = self.best_model or self.exp.get_config("best_model")
2
77c88226bfde planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
108
4
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
109 X_data = None
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
110 for key in ("X_test_transformed", "X_train_transformed"):
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
111 try:
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
112 X_data = self.exp.get_config(key)
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
113 break
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
114 except KeyError:
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
115 continue
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
116 if X_data is None:
8
ba45bc057d70 planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 7
diff changeset
117 raise RuntimeError("No transformed dataset found for SHAP.")
ba45bc057d70 planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 7
diff changeset
118
ba45bc057d70 planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 7
diff changeset
119 # --- Adaptive feature limiting (proportional cap) ---
ba45bc057d70 planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 7
diff changeset
120 n_rows, n_features = X_data.shape
ba45bc057d70 planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 7
diff changeset
121 if max_features is None:
ba45bc057d70 planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 7
diff changeset
122 if n_features <= 200:
ba45bc057d70 planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 7
diff changeset
123 max_features = n_features
ba45bc057d70 planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 7
diff changeset
124 else:
ba45bc057d70 planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 7
diff changeset
125 max_features = min(200, max(20, int(n_features * 0.1)))
4
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
126
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
127 try:
8
ba45bc057d70 planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 7
diff changeset
128 if hasattr(model, "feature_importances_"):
ba45bc057d70 planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 7
diff changeset
129 importances = pd.Series(
ba45bc057d70 planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 7
diff changeset
130 model.feature_importances_, index=X_data.columns
ba45bc057d70 planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 7
diff changeset
131 )
ba45bc057d70 planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 7
diff changeset
132 top_features = importances.nlargest(max_features).index
ba45bc057d70 planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 7
diff changeset
133 elif hasattr(model, "coef_"):
ba45bc057d70 planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 7
diff changeset
134 coef = abs(model.coef_).flatten()
ba45bc057d70 planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 7
diff changeset
135 importances = pd.Series(coef, index=X_data.columns)
ba45bc057d70 planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 7
diff changeset
136 top_features = importances.nlargest(max_features).index
ba45bc057d70 planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 7
diff changeset
137 else:
ba45bc057d70 planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 7
diff changeset
138 variances = X_data.var()
ba45bc057d70 planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 7
diff changeset
139 top_features = variances.nlargest(max_features).index
ba45bc057d70 planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 7
diff changeset
140
ba45bc057d70 planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 7
diff changeset
141 if len(top_features) < n_features:
ba45bc057d70 planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 7
diff changeset
142 LOG.info(
ba45bc057d70 planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 7
diff changeset
143 f"Restricted SHAP computation to top {len(top_features)} / {n_features} features"
ba45bc057d70 planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 7
diff changeset
144 )
ba45bc057d70 planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 7
diff changeset
145 X_data = X_data[top_features]
ba45bc057d70 planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 7
diff changeset
146 except Exception as e:
ba45bc057d70 planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 7
diff changeset
147 LOG.warning(
ba45bc057d70 planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 7
diff changeset
148 f"Feature limiting failed: {e}. Using all {n_features} features."
ba45bc057d70 planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 7
diff changeset
149 )
4
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
150
8
ba45bc057d70 planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 7
diff changeset
151 # --- Adaptive row subsampling ---
ba45bc057d70 planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 7
diff changeset
152 if max_samples is None:
ba45bc057d70 planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 7
diff changeset
153 if n_rows <= 500:
ba45bc057d70 planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 7
diff changeset
154 max_samples = n_rows
ba45bc057d70 planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 7
diff changeset
155 elif n_rows <= 5000:
ba45bc057d70 planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 7
diff changeset
156 max_samples = 500
ba45bc057d70 planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 7
diff changeset
157 else:
ba45bc057d70 planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 7
diff changeset
158 max_samples = min(1000, int(n_rows * 0.1))
ba45bc057d70 planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 7
diff changeset
159
ba45bc057d70 planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 7
diff changeset
160 if n_rows > max_samples:
ba45bc057d70 planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 7
diff changeset
161 LOG.info(f"Subsampling SHAP rows: {max_samples} of {n_rows}")
ba45bc057d70 planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 7
diff changeset
162 X_data = X_data.sample(max_samples, random_state=42)
4
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
163
8
ba45bc057d70 planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 7
diff changeset
164 # --- Adaptive feature display ---
ba45bc057d70 planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 7
diff changeset
165 if max_display is None:
ba45bc057d70 planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 7
diff changeset
166 if X_data.shape[1] <= 20:
ba45bc057d70 planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 7
diff changeset
167 max_display = X_data.shape[1]
ba45bc057d70 planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 7
diff changeset
168 elif X_data.shape[1] <= 100:
ba45bc057d70 planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 7
diff changeset
169 max_display = 30
ba45bc057d70 planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 7
diff changeset
170 else:
ba45bc057d70 planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 7
diff changeset
171 max_display = 50
ba45bc057d70 planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 7
diff changeset
172
ba45bc057d70 planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 7
diff changeset
173 # Background set
ba45bc057d70 planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 7
diff changeset
174 bg = X_data.sample(min(len(X_data), 100), random_state=42)
ba45bc057d70 planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 7
diff changeset
175 predict_fn = (
ba45bc057d70 planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 7
diff changeset
176 model.predict_proba if hasattr(model, "predict_proba") else model.predict
ba45bc057d70 planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 7
diff changeset
177 )
ba45bc057d70 planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 7
diff changeset
178
ba45bc057d70 planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 7
diff changeset
179 # Optimized explainer
ba45bc057d70 planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 7
diff changeset
180 if hasattr(model, "feature_importances_"):
ba45bc057d70 planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 7
diff changeset
181 explainer = shap.TreeExplainer(
ba45bc057d70 planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 7
diff changeset
182 model, bg, feature_perturbation="tree_path_dependent", n_jobs=-1
ba45bc057d70 planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 7
diff changeset
183 )
ba45bc057d70 planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 7
diff changeset
184 elif hasattr(model, "coef_"):
ba45bc057d70 planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 7
diff changeset
185 explainer = shap.LinearExplainer(model, bg)
ba45bc057d70 planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 7
diff changeset
186 else:
ba45bc057d70 planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 7
diff changeset
187 explainer = shap.Explainer(predict_fn, bg)
2
77c88226bfde planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
188
7
0afd970bd8ae planemo upload for repository https://github.com/goeckslab/gleam commit 55deacbbc78a00f27d789e11d563ba49dfb9cf9e
goeckslab
parents: 4
diff changeset
189 try:
8
ba45bc057d70 planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 7
diff changeset
190 shap_values = explainer(X_data)
7
0afd970bd8ae planemo upload for repository https://github.com/goeckslab/gleam commit 55deacbbc78a00f27d789e11d563ba49dfb9cf9e
goeckslab
parents: 4
diff changeset
191 self.shap_model_name = explainer.__class__.__name__
0afd970bd8ae planemo upload for repository https://github.com/goeckslab/gleam commit 55deacbbc78a00f27d789e11d563ba49dfb9cf9e
goeckslab
parents: 4
diff changeset
192 except Exception as e:
0afd970bd8ae planemo upload for repository https://github.com/goeckslab/gleam commit 55deacbbc78a00f27d789e11d563ba49dfb9cf9e
goeckslab
parents: 4
diff changeset
193 LOG.error(f"SHAP computation failed: {e}")
0afd970bd8ae planemo upload for repository https://github.com/goeckslab/gleam commit 55deacbbc78a00f27d789e11d563ba49dfb9cf9e
goeckslab
parents: 4
diff changeset
194 self.shap_model_name = None
0afd970bd8ae planemo upload for repository https://github.com/goeckslab/gleam commit 55deacbbc78a00f27d789e11d563ba49dfb9cf9e
goeckslab
parents: 4
diff changeset
195 return
4
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
196
8
ba45bc057d70 planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 7
diff changeset
197 # --- Plot SHAP summary ---
ba45bc057d70 planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 7
diff changeset
198 out_path = os.path.join(self.output_dir, "shap_summary.png")
ba45bc057d70 planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 7
diff changeset
199 plt.figure()
ba45bc057d70 planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 7
diff changeset
200 shap.plots.beeswarm(shap_values, max_display=max_display, show=False)
ba45bc057d70 planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 7
diff changeset
201 plt.title(
ba45bc057d70 planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 7
diff changeset
202 f"SHAP Summary for {model.__class__.__name__} (top {max_display} features)"
ba45bc057d70 planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 7
diff changeset
203 )
ba45bc057d70 planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 7
diff changeset
204 plt.savefig(out_path, bbox_inches="tight")
ba45bc057d70 planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 7
diff changeset
205 plt.close()
ba45bc057d70 planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 7
diff changeset
206 self.plots["shap_summary"] = out_path
4
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
207
8
ba45bc057d70 planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 7
diff changeset
208 # --- Log summary ---
ba45bc057d70 planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 7
diff changeset
209 LOG.info(
ba45bc057d70 planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 7
diff changeset
210 f"SHAP summary completed with {X_data.shape[0]} rows and {X_data.shape[1]} features (displaying top {max_display})."
ba45bc057d70 planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 7
diff changeset
211 )
0
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
212
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
213 def generate_html_report(self):
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
214 LOG.info("Generating HTML report")
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
215 plots_html = ""
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
216 for plot_name, plot_path in self.plots.items():
8
ba45bc057d70 planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 7
diff changeset
217 if plot_name == "tree_importance" and not getattr(
ba45bc057d70 planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 7
diff changeset
218 self, "tree_model_name", None
4
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
219 ):
2
77c88226bfde planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
220 continue
0
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
221 encoded_image = self.encode_image_to_base64(plot_path)
8
ba45bc057d70 planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 7
diff changeset
222 if plot_name == "tree_importance" and getattr(
ba45bc057d70 planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 7
diff changeset
223 self, "tree_model_name", None
4
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
224 ):
8
ba45bc057d70 planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 7
diff changeset
225 section_title = f"Feature importance from {self.tree_model_name}"
ba45bc057d70 planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 7
diff changeset
226 elif plot_name == "shap_summary":
4
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
227 section_title = (
8
ba45bc057d70 planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 7
diff changeset
228 f"SHAP Summary from {getattr(self, 'shap_model_name', 'model')}"
4
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
229 )
2
77c88226bfde planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
230 else:
77c88226bfde planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
231 section_title = plot_name
0
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
232 plots_html += f"""
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
233 <div class="plot" id="{plot_name}">
2
77c88226bfde planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
234 <h2>{section_title}</h2>
77c88226bfde planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
235 <img src="data:image/png;base64,{encoded_image}" alt="{plot_name}">
0
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
236 </div>
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
237 """
8
ba45bc057d70 planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 7
diff changeset
238 return f"{plots_html}"
0
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
239
2
77c88226bfde planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
240 def encode_image_to_base64(self, img_path):
8
ba45bc057d70 planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 7
diff changeset
241 with open(img_path, "rb") as img_file:
ba45bc057d70 planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 7
diff changeset
242 return base64.b64encode(img_file.read()).decode("utf-8")
0
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
243
2
77c88226bfde planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
244 def run(self):
4
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
245 if (
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
246 self.exp is None
8
ba45bc057d70 planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 7
diff changeset
247 or not hasattr(self.exp, "is_setup")
4
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
248 or not self.exp.is_setup
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
249 ):
2
77c88226bfde planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
250 self.setup_pycaret()
77c88226bfde planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
251 self.save_tree_importance()
77c88226bfde planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
252 self.save_shap_values()
8
ba45bc057d70 planemo upload for repository https://github.com/goeckslab/gleam commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 7
diff changeset
253 return self.generate_html_report()