Mercurial > repos > goeckslab > pycaret_predict
annotate base_model_trainer.py @ 16:4fee4504646e draft default tip
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
| author | goeckslab |
|---|---|
| date | Fri, 28 Nov 2025 22:28:26 +0000 |
| parents | 7d78a6afc958 |
| children |
| rev | line source |
|---|---|
|
0
1f20fe57fdee
planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff
changeset
|
1 import base64 |
|
1f20fe57fdee
planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff
changeset
|
2 import logging |
|
1f20fe57fdee
planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff
changeset
|
3 import tempfile |
|
8
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
4 from pathlib import Path |
|
0
1f20fe57fdee
planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff
changeset
|
5 |
|
1f20fe57fdee
planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff
changeset
|
6 import h5py |
|
1f20fe57fdee
planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff
changeset
|
7 import joblib |
|
1f20fe57fdee
planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff
changeset
|
8 import numpy as np |
|
1f20fe57fdee
planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff
changeset
|
9 import pandas as pd |
|
6
a32ff7201629
planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
3
diff
changeset
|
10 from feature_help_modal import get_feature_metrics_help_modal |
|
3
ccd798db5abb
planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit cf47efb521b91a9cb44ae5c5ade860627f9b9030
goeckslab
parents:
2
diff
changeset
|
11 from feature_importance import FeatureImportanceAnalyzer |
|
0
1f20fe57fdee
planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff
changeset
|
12 from sklearn.metrics import average_precision_score |
|
8
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
13 from utils import ( |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
14 add_hr_to_html, |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
15 add_plot_to_html, |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
16 build_tabbed_html, |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
17 encode_image_to_base64, |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
18 get_html_closing, |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
19 get_html_template, |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
20 ) |
|
0
1f20fe57fdee
planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff
changeset
|
21 |
|
1f20fe57fdee
planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff
changeset
|
22 logging.basicConfig(level=logging.DEBUG) |
|
1f20fe57fdee
planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff
changeset
|
23 LOG = logging.getLogger(__name__) |
|
1f20fe57fdee
planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff
changeset
|
24 |
|
1f20fe57fdee
planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff
changeset
|
25 |
|
1f20fe57fdee
planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff
changeset
|
26 class BaseModelTrainer: |
|
1f20fe57fdee
planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff
changeset
|
27 def __init__( |
|
6
a32ff7201629
planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
3
diff
changeset
|
28 self, |
|
a32ff7201629
planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
3
diff
changeset
|
29 input_file, |
|
a32ff7201629
planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
3
diff
changeset
|
30 target_col, |
|
a32ff7201629
planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
3
diff
changeset
|
31 output_dir, |
|
a32ff7201629
planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
3
diff
changeset
|
32 task_type, |
|
a32ff7201629
planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
3
diff
changeset
|
33 random_seed, |
|
a32ff7201629
planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
3
diff
changeset
|
34 test_file=None, |
|
a32ff7201629
planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
3
diff
changeset
|
35 **kwargs, |
|
a32ff7201629
planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
3
diff
changeset
|
36 ): |
|
8
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
37 self.exp = None |
|
0
1f20fe57fdee
planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff
changeset
|
38 self.input_file = input_file |
|
1f20fe57fdee
planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff
changeset
|
39 self.target_col = target_col |
|
1f20fe57fdee
planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff
changeset
|
40 self.output_dir = output_dir |
|
1f20fe57fdee
planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff
changeset
|
41 self.task_type = task_type |
|
1f20fe57fdee
planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff
changeset
|
42 self.random_seed = random_seed |
|
1f20fe57fdee
planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff
changeset
|
43 self.data = None |
|
1f20fe57fdee
planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff
changeset
|
44 self.target = None |
|
1f20fe57fdee
planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff
changeset
|
45 self.best_model = None |
|
1f20fe57fdee
planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff
changeset
|
46 self.results = None |
|
10
e2a6fed32d54
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
9
diff
changeset
|
47 self.tuning_results = None |
|
0
1f20fe57fdee
planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff
changeset
|
48 self.features_name = None |
|
16
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
14
diff
changeset
|
49 self.plot_feature_names = None |
|
0
1f20fe57fdee
planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff
changeset
|
50 self.plots = {} |
|
8
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
51 self.explainer_plots = {} |
|
0
1f20fe57fdee
planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff
changeset
|
52 self.plots_explainer_html = None |
|
1f20fe57fdee
planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff
changeset
|
53 self.trees = [] |
|
8
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
54 self.user_kwargs = kwargs.copy() |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
55 for key, value in self.user_kwargs.items(): |
|
0
1f20fe57fdee
planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff
changeset
|
56 setattr(self, key, value) |
|
16
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
14
diff
changeset
|
57 if not hasattr(self, "plot_feature_limit"): |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
14
diff
changeset
|
58 self.plot_feature_limit = 30 |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
14
diff
changeset
|
59 self._shap_row_cap = None |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
14
diff
changeset
|
60 if getattr(self, "polynomial_features", False): |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
14
diff
changeset
|
61 # Keep feature importance responsive by trimming plots/SHAP rows |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
14
diff
changeset
|
62 try: |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
14
diff
changeset
|
63 limit_val = int(self.plot_feature_limit) |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
14
diff
changeset
|
64 except (TypeError, ValueError): |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
14
diff
changeset
|
65 limit_val = 30 |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
14
diff
changeset
|
66 self.plot_feature_limit = min(limit_val, 15) |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
14
diff
changeset
|
67 self._shap_row_cap = 200 |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
14
diff
changeset
|
68 LOG.info( |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
14
diff
changeset
|
69 "Polynomial features enabled; limiting feature plots to %s and SHAP rows to %s", |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
14
diff
changeset
|
70 self.plot_feature_limit, |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
14
diff
changeset
|
71 self._shap_row_cap, |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
14
diff
changeset
|
72 ) |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
14
diff
changeset
|
73 self.imputed_training_data = None |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
14
diff
changeset
|
74 self._best_model_metric_used = None |
|
0
1f20fe57fdee
planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff
changeset
|
75 self.setup_params = {} |
|
1f20fe57fdee
planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff
changeset
|
76 self.test_file = test_file |
|
1f20fe57fdee
planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff
changeset
|
77 self.test_data = None |
|
1f20fe57fdee
planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff
changeset
|
78 |
|
6
a32ff7201629
planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
3
diff
changeset
|
79 if not self.output_dir: |
|
10
e2a6fed32d54
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
9
diff
changeset
|
80 raise ValueError( |
|
e2a6fed32d54
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
9
diff
changeset
|
81 "output_dir must be specified and not None" |
|
e2a6fed32d54
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
9
diff
changeset
|
82 ) |
|
e2a6fed32d54
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
9
diff
changeset
|
83 |
|
e2a6fed32d54
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
9
diff
changeset
|
84 # Warn about irrelevant kwargs for the task type |
|
e2a6fed32d54
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
9
diff
changeset
|
85 if self.task_type == "regression" and ( |
|
e2a6fed32d54
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
9
diff
changeset
|
86 "probability_threshold" in self.user_kwargs |
|
e2a6fed32d54
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
9
diff
changeset
|
87 ): |
|
e2a6fed32d54
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
9
diff
changeset
|
88 LOG.warning( |
|
e2a6fed32d54
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
9
diff
changeset
|
89 "probability_threshold is ignored for regression tasks." |
|
e2a6fed32d54
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
9
diff
changeset
|
90 ) |
|
6
a32ff7201629
planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
3
diff
changeset
|
91 |
|
0
1f20fe57fdee
planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff
changeset
|
92 LOG.info(f"Model kwargs: {self.__dict__}") |
|
1f20fe57fdee
planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff
changeset
|
93 |
|
1f20fe57fdee
planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff
changeset
|
94 def load_data(self): |
|
1f20fe57fdee
planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff
changeset
|
95 LOG.info(f"Loading data from {self.input_file}") |
|
10
e2a6fed32d54
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
9
diff
changeset
|
96 self.data = pd.read_csv( |
|
e2a6fed32d54
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
9
diff
changeset
|
97 self.input_file, sep=None, engine="python" |
|
e2a6fed32d54
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
9
diff
changeset
|
98 ) |
|
6
a32ff7201629
planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
3
diff
changeset
|
99 self.data.columns = self.data.columns.str.replace(".", "_") |
|
10
e2a6fed32d54
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
9
diff
changeset
|
100 |
|
e2a6fed32d54
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
9
diff
changeset
|
101 names = self.data.columns.to_list() |
|
e2a6fed32d54
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
9
diff
changeset
|
102 LOG.info(f"Original dataset columns: {names}") |
|
e2a6fed32d54
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
9
diff
changeset
|
103 |
|
e2a6fed32d54
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
9
diff
changeset
|
104 target_index = int(self.target_col) - 1 |
|
e2a6fed32d54
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
9
diff
changeset
|
105 num_cols = len(names) |
|
e2a6fed32d54
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
9
diff
changeset
|
106 if target_index < 0 or target_index >= num_cols: |
|
e2a6fed32d54
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
9
diff
changeset
|
107 raise ValueError( |
|
e2a6fed32d54
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
9
diff
changeset
|
108 f"Target column number {self.target_col} is invalid. " |
|
e2a6fed32d54
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
9
diff
changeset
|
109 f"Please select a number between 1 and {num_cols}." |
|
e2a6fed32d54
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
9
diff
changeset
|
110 ) |
|
e2a6fed32d54
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
9
diff
changeset
|
111 |
|
e2a6fed32d54
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
9
diff
changeset
|
112 self.target = names[target_index] |
|
e2a6fed32d54
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
9
diff
changeset
|
113 |
|
e2a6fed32d54
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
9
diff
changeset
|
114 # Conditional drop: only if 'prediction_label' exists and is not |
|
e2a6fed32d54
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
9
diff
changeset
|
115 # the target |
|
e2a6fed32d54
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
9
diff
changeset
|
116 if "prediction_label" in self.data.columns and ( |
|
e2a6fed32d54
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
9
diff
changeset
|
117 self.data.columns[target_index] != "prediction_label" |
|
e2a6fed32d54
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
9
diff
changeset
|
118 ): |
|
e2a6fed32d54
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
9
diff
changeset
|
119 LOG.info( |
|
e2a6fed32d54
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
9
diff
changeset
|
120 "Dropping 'prediction_label' column as it's not the target." |
|
e2a6fed32d54
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
9
diff
changeset
|
121 ) |
|
6
a32ff7201629
planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
3
diff
changeset
|
122 self.data = self.data.drop(columns=["prediction_label"]) |
|
10
e2a6fed32d54
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
9
diff
changeset
|
123 else: |
|
e2a6fed32d54
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
9
diff
changeset
|
124 if self.target == "prediction_label": |
|
e2a6fed32d54
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
9
diff
changeset
|
125 LOG.warning( |
|
e2a6fed32d54
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
9
diff
changeset
|
126 "Using 'prediction_label' as target column. " |
|
e2a6fed32d54
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
9
diff
changeset
|
127 "This may not be intended if it's a previous prediction." |
|
e2a6fed32d54
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
9
diff
changeset
|
128 ) |
|
6
a32ff7201629
planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
3
diff
changeset
|
129 |
|
10
e2a6fed32d54
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
9
diff
changeset
|
130 numeric_cols = self.data.select_dtypes( |
|
e2a6fed32d54
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
9
diff
changeset
|
131 include=["number"] |
|
e2a6fed32d54
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
9
diff
changeset
|
132 ).columns |
|
e2a6fed32d54
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
9
diff
changeset
|
133 non_numeric_cols = self.data.select_dtypes( |
|
e2a6fed32d54
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
9
diff
changeset
|
134 exclude=["number"] |
|
e2a6fed32d54
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
9
diff
changeset
|
135 ).columns |
|
0
1f20fe57fdee
planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff
changeset
|
136 self.data[numeric_cols] = self.data[numeric_cols].apply( |
|
6
a32ff7201629
planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
3
diff
changeset
|
137 pd.to_numeric, errors="coerce" |
|
a32ff7201629
planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
3
diff
changeset
|
138 ) |
|
0
1f20fe57fdee
planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff
changeset
|
139 if len(non_numeric_cols) > 0: |
|
10
e2a6fed32d54
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
9
diff
changeset
|
140 LOG.info( |
|
e2a6fed32d54
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
9
diff
changeset
|
141 f"Non-numeric columns found: {non_numeric_cols.tolist()}" |
|
e2a6fed32d54
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
9
diff
changeset
|
142 ) |
|
0
1f20fe57fdee
planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff
changeset
|
143 |
|
10
e2a6fed32d54
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
9
diff
changeset
|
144 # Update names after possible drop |
|
0
1f20fe57fdee
planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff
changeset
|
145 names = self.data.columns.to_list() |
|
10
e2a6fed32d54
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
9
diff
changeset
|
146 LOG.info(f"Dataset columns after processing: {names}") |
|
e2a6fed32d54
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
9
diff
changeset
|
147 |
|
e2a6fed32d54
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
9
diff
changeset
|
148 self.features_name = [n for n in names if n != self.target] |
|
16
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
14
diff
changeset
|
149 self.plot_feature_names = self._select_plot_features(self.features_name) |
|
0
1f20fe57fdee
planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff
changeset
|
150 |
|
1f20fe57fdee
planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff
changeset
|
151 if self.test_file: |
|
1f20fe57fdee
planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff
changeset
|
152 LOG.info(f"Loading test data from {self.test_file}") |
|
10
e2a6fed32d54
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
9
diff
changeset
|
153 df_test = pd.read_csv( |
|
e2a6fed32d54
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
9
diff
changeset
|
154 self.test_file, sep=None, engine="python" |
|
e2a6fed32d54
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
9
diff
changeset
|
155 ) |
|
8
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
156 df_test.columns = df_test.columns.str.replace(".", "_") |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
157 self.test_data = df_test |
|
0
1f20fe57fdee
planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff
changeset
|
158 |
|
16
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
14
diff
changeset
|
159 def _select_plot_features(self, all_features): |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
14
diff
changeset
|
160 limit = getattr(self, "plot_feature_limit", 30) |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
14
diff
changeset
|
161 if not isinstance(limit, int) or limit <= 0: |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
14
diff
changeset
|
162 LOG.info( |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
14
diff
changeset
|
163 "Feature plotting limit disabled (plot_feature_limit=%s).", limit |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
14
diff
changeset
|
164 ) |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
14
diff
changeset
|
165 return all_features |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
14
diff
changeset
|
166 if len(all_features) <= limit: |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
14
diff
changeset
|
167 LOG.info( |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
14
diff
changeset
|
168 "Feature plotting limit not needed (%s features <= limit %s).", |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
14
diff
changeset
|
169 len(all_features), |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
14
diff
changeset
|
170 limit, |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
14
diff
changeset
|
171 ) |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
14
diff
changeset
|
172 return all_features |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
14
diff
changeset
|
173 df = self.data[all_features].copy() |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
14
diff
changeset
|
174 numeric_cols = df.select_dtypes(include=["number"]).columns |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
14
diff
changeset
|
175 ranked = [] |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
14
diff
changeset
|
176 if len(numeric_cols) > 0: |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
14
diff
changeset
|
177 variances = ( |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
14
diff
changeset
|
178 df[numeric_cols] |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
14
diff
changeset
|
179 .var() |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
14
diff
changeset
|
180 .fillna(0) |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
14
diff
changeset
|
181 .abs() |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
14
diff
changeset
|
182 .sort_values(ascending=False) |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
14
diff
changeset
|
183 ) |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
14
diff
changeset
|
184 ranked = variances.index.tolist() |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
14
diff
changeset
|
185 selected = [] |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
14
diff
changeset
|
186 for col in ranked: |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
14
diff
changeset
|
187 if len(selected) >= limit: |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
14
diff
changeset
|
188 break |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
14
diff
changeset
|
189 selected.append(col) |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
14
diff
changeset
|
190 if len(selected) < limit: |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
14
diff
changeset
|
191 for col in all_features: |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
14
diff
changeset
|
192 if col in selected: |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
14
diff
changeset
|
193 continue |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
14
diff
changeset
|
194 selected.append(col) |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
14
diff
changeset
|
195 if len(selected) >= limit: |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
14
diff
changeset
|
196 break |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
14
diff
changeset
|
197 LOG.info( |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
14
diff
changeset
|
198 "Limiting feature-level plots to %s of %s available features (limit=%s).", |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
14
diff
changeset
|
199 len(selected), |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
14
diff
changeset
|
200 len(all_features), |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
14
diff
changeset
|
201 limit, |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
14
diff
changeset
|
202 ) |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
14
diff
changeset
|
203 return selected |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
14
diff
changeset
|
204 |
|
0
1f20fe57fdee
planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff
changeset
|
205 def setup_pycaret(self): |
|
1f20fe57fdee
planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff
changeset
|
206 LOG.info("Initializing PyCaret") |
|
1f20fe57fdee
planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff
changeset
|
207 self.setup_params = { |
|
6
a32ff7201629
planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
3
diff
changeset
|
208 "target": self.target, |
|
a32ff7201629
planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
3
diff
changeset
|
209 "session_id": self.random_seed, |
|
a32ff7201629
planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
3
diff
changeset
|
210 "html": True, |
|
a32ff7201629
planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
3
diff
changeset
|
211 "log_experiment": False, |
|
a32ff7201629
planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
3
diff
changeset
|
212 "system_log": False, |
|
a32ff7201629
planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
3
diff
changeset
|
213 "index": False, |
|
0
1f20fe57fdee
planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff
changeset
|
214 } |
|
1f20fe57fdee
planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff
changeset
|
215 if self.test_data is not None: |
|
6
a32ff7201629
planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
3
diff
changeset
|
216 self.setup_params["test_data"] = self.test_data |
|
8
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
217 for attr in [ |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
218 "train_size", |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
219 "normalize", |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
220 "feature_selection", |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
221 "remove_outliers", |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
222 "remove_multicollinearity", |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
223 "polynomial_features", |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
224 "feature_interaction", |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
225 "feature_ratio", |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
226 "fix_imbalance", |
|
14
7d78a6afc958
planemo upload for repository https://github.com/goeckslab/gleam commit 1ffd143e57fa952ee9dd84fc141771520aea0791
goeckslab
parents:
13
diff
changeset
|
227 "n_jobs", |
|
8
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
228 ]: |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
229 val = getattr(self, attr, None) |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
230 if val is not None: |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
231 self.setup_params[attr] = val |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
232 if getattr(self, "cross_validation_folds", None) is not None: |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
233 self.setup_params["fold"] = self.cross_validation_folds |
|
0
1f20fe57fdee
planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff
changeset
|
234 LOG.info(self.setup_params) |
|
6
a32ff7201629
planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
3
diff
changeset
|
235 |
|
a32ff7201629
planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
3
diff
changeset
|
236 if self.task_type == "classification": |
|
a32ff7201629
planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
3
diff
changeset
|
237 from pycaret.classification import ClassificationExperiment |
|
a32ff7201629
planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
3
diff
changeset
|
238 |
|
a32ff7201629
planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
3
diff
changeset
|
239 self.exp = ClassificationExperiment() |
|
a32ff7201629
planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
3
diff
changeset
|
240 elif self.task_type == "regression": |
|
a32ff7201629
planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
3
diff
changeset
|
241 from pycaret.regression import RegressionExperiment |
|
a32ff7201629
planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
3
diff
changeset
|
242 |
|
a32ff7201629
planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
3
diff
changeset
|
243 self.exp = RegressionExperiment() |
|
a32ff7201629
planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
3
diff
changeset
|
244 else: |
|
10
e2a6fed32d54
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
9
diff
changeset
|
245 raise ValueError( |
|
e2a6fed32d54
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
9
diff
changeset
|
246 "task_type must be 'classification' or 'regression'" |
|
e2a6fed32d54
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
9
diff
changeset
|
247 ) |
|
6
a32ff7201629
planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
3
diff
changeset
|
248 |
|
0
1f20fe57fdee
planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff
changeset
|
249 self.exp.setup(self.data, **self.setup_params) |
|
16
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
14
diff
changeset
|
250 self._capture_imputed_training_data() |
|
8
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
251 self.setup_params.update(self.user_kwargs) |
|
0
1f20fe57fdee
planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff
changeset
|
252 |
|
16
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
14
diff
changeset
|
253 def _capture_imputed_training_data(self): |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
14
diff
changeset
|
254 """ |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
14
diff
changeset
|
255 Cache the dataset as transformed/imputed by PyCaret so downstream |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
14
diff
changeset
|
256 components (e.g., feature importance) can operate on the exact data |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
14
diff
changeset
|
257 used for training. |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
14
diff
changeset
|
258 """ |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
14
diff
changeset
|
259 if self.exp is None: |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
14
diff
changeset
|
260 return |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
14
diff
changeset
|
261 try: |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
14
diff
changeset
|
262 X_processed = self.exp.get_config("X_transformed").copy() |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
14
diff
changeset
|
263 y_processed = self.exp.get_config("y") |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
14
diff
changeset
|
264 if isinstance(y_processed, pd.Series): |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
14
diff
changeset
|
265 y_series = y_processed.reset_index(drop=True) |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
14
diff
changeset
|
266 else: |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
14
diff
changeset
|
267 y_series = pd.Series(y_processed) |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
14
diff
changeset
|
268 y_series.name = self.target |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
14
diff
changeset
|
269 X_processed = X_processed.reset_index(drop=True) |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
14
diff
changeset
|
270 self.imputed_training_data = pd.concat( |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
14
diff
changeset
|
271 [X_processed, y_series], axis=1 |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
14
diff
changeset
|
272 ) |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
14
diff
changeset
|
273 LOG.info( |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
14
diff
changeset
|
274 "Captured imputed training dataset from PyCaret " |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
14
diff
changeset
|
275 "(%s rows, %s features).", |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
14
diff
changeset
|
276 self.imputed_training_data.shape[0], |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
14
diff
changeset
|
277 self.imputed_training_data.shape[1] - 1, |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
14
diff
changeset
|
278 ) |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
14
diff
changeset
|
279 except Exception as exc: |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
14
diff
changeset
|
280 LOG.warning( |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
14
diff
changeset
|
281 "Unable to capture processed training data from PyCaret: %s", |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
14
diff
changeset
|
282 exc, |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
14
diff
changeset
|
283 ) |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
14
diff
changeset
|
284 self.imputed_training_data = None |
|
13
f07850192bc2
planemo upload for repository https://github.com/goeckslab/gleam commit 84d5cd0b1fa5c1ff0ad892bc39c95dad1ceb4920
goeckslab
parents:
10
diff
changeset
|
285 |
|
0
1f20fe57fdee
planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff
changeset
|
286 def train_model(self): |
|
1f20fe57fdee
planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff
changeset
|
287 LOG.info("Training and selecting the best model") |
|
1f20fe57fdee
planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff
changeset
|
288 if self.task_type == "classification": |
|
6
a32ff7201629
planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
3
diff
changeset
|
289 self.exp.add_metric( |
|
8
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
290 id="PR-AUC-Weighted", |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
291 name="PR-AUC-Weighted", |
|
6
a32ff7201629
planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
3
diff
changeset
|
292 target="pred_proba", |
|
a32ff7201629
planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
3
diff
changeset
|
293 score_func=average_precision_score, |
|
a32ff7201629
planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
3
diff
changeset
|
294 average="weighted", |
|
a32ff7201629
planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
3
diff
changeset
|
295 ) |
|
8
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
296 # Build arguments for compare_models() |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
297 compare_kwargs = {} |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
298 if getattr(self, "models", None): |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
299 compare_kwargs["include"] = self.models |
|
0
1f20fe57fdee
planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff
changeset
|
300 |
|
8
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
301 # Respect explicit cross-validation flag |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
302 if getattr(self, "cross_validation", None) is not None: |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
303 compare_kwargs["cross_validation"] = self.cross_validation |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
304 |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
305 # Respect explicit fold count |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
306 if getattr(self, "cross_validation_folds", None) is not None: |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
307 compare_kwargs["fold"] = self.cross_validation_folds |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
308 |
|
16
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
14
diff
changeset
|
309 best_metric = getattr(self, "best_model_metric", None) |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
14
diff
changeset
|
310 if best_metric: |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
14
diff
changeset
|
311 compare_kwargs["sort"] = best_metric |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
14
diff
changeset
|
312 self._best_model_metric_used = best_metric |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
14
diff
changeset
|
313 LOG.info(f"Ranking models using metric: {best_metric}") |
|
13
f07850192bc2
planemo upload for repository https://github.com/goeckslab/gleam commit 84d5cd0b1fa5c1ff0ad892bc39c95dad1ceb4920
goeckslab
parents:
10
diff
changeset
|
314 |
|
8
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
315 LOG.info(f"compare_models kwargs: {compare_kwargs}") |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
316 self.best_model = self.exp.compare_models(**compare_kwargs) |
|
16
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
14
diff
changeset
|
317 if self._best_model_metric_used is None: |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
14
diff
changeset
|
318 self._best_model_metric_used = getattr(self.exp, "_fold_metric", None) |
|
0
1f20fe57fdee
planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff
changeset
|
319 self.results = self.exp.pull() |
|
8
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
320 if getattr(self, "tune_model", False): |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
321 LOG.info("Tuning hyperparameters of the best model") |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
322 self.best_model = self.exp.tune_model(self.best_model) |
|
10
e2a6fed32d54
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
9
diff
changeset
|
323 self.tuning_results = self.exp.pull() |
|
7
f4cb41f458fd
planemo upload for repository https://github.com/goeckslab/gleam commit b430f8b466655878c3bf63b053655fdbf039ddb0
goeckslab
parents:
6
diff
changeset
|
324 |
|
0
1f20fe57fdee
planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff
changeset
|
325 if self.task_type == "classification": |
|
6
a32ff7201629
planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
3
diff
changeset
|
326 self.results.rename(columns={"AUC": "ROC-AUC"}, inplace=True) |
|
9
c6c1f8777aae
planemo upload for repository https://github.com/goeckslab/gleam commit 4a11e8a4c4e9daa884bddedfa47090476c517667
goeckslab
parents:
8
diff
changeset
|
327 |
|
c6c1f8777aae
planemo upload for repository https://github.com/goeckslab/gleam commit 4a11e8a4c4e9daa884bddedfa47090476c517667
goeckslab
parents:
8
diff
changeset
|
328 prob_thresh = getattr(self, "probability_threshold", None) |
|
10
e2a6fed32d54
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
9
diff
changeset
|
329 if self.task_type == "classification" and ( |
|
e2a6fed32d54
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
9
diff
changeset
|
330 prob_thresh is not None |
|
e2a6fed32d54
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
9
diff
changeset
|
331 ): |
|
e2a6fed32d54
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
9
diff
changeset
|
332 _ = self.exp.predict_model( |
|
e2a6fed32d54
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
9
diff
changeset
|
333 self.best_model, probability_threshold=prob_thresh |
|
e2a6fed32d54
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
9
diff
changeset
|
334 ) |
|
9
c6c1f8777aae
planemo upload for repository https://github.com/goeckslab/gleam commit 4a11e8a4c4e9daa884bddedfa47090476c517667
goeckslab
parents:
8
diff
changeset
|
335 else: |
|
c6c1f8777aae
planemo upload for repository https://github.com/goeckslab/gleam commit 4a11e8a4c4e9daa884bddedfa47090476c517667
goeckslab
parents:
8
diff
changeset
|
336 _ = self.exp.predict_model(self.best_model) |
|
c6c1f8777aae
planemo upload for repository https://github.com/goeckslab/gleam commit 4a11e8a4c4e9daa884bddedfa47090476c517667
goeckslab
parents:
8
diff
changeset
|
337 |
|
0
1f20fe57fdee
planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff
changeset
|
338 self.test_result_df = self.exp.pull() |
|
1f20fe57fdee
planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff
changeset
|
339 if self.task_type == "classification": |
|
10
e2a6fed32d54
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
9
diff
changeset
|
340 self.test_result_df.rename( |
|
e2a6fed32d54
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
9
diff
changeset
|
341 columns={"AUC": "ROC-AUC"}, inplace=True |
|
e2a6fed32d54
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
9
diff
changeset
|
342 ) |
|
0
1f20fe57fdee
planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff
changeset
|
343 |
|
1f20fe57fdee
planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff
changeset
|
344 def save_model(self): |
|
8
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
345 hdf5_path = Path(self.output_dir) / "pycaret_model.h5" |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
346 with h5py.File(hdf5_path, "w") as f: |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
347 with tempfile.NamedTemporaryFile(delete=False) as tmp: |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
348 joblib.dump(self.best_model, tmp.name) |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
349 tmp.seek(0) |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
350 model_bytes = tmp.read() |
|
6
a32ff7201629
planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
3
diff
changeset
|
351 f.create_dataset("model", data=np.void(model_bytes)) |
|
0
1f20fe57fdee
planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff
changeset
|
352 |
|
1f20fe57fdee
planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff
changeset
|
353 def generate_plots(self): |
|
8
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
354 LOG.info("Generating PyCaret diagnostic pltos") |
|
0
1f20fe57fdee
planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff
changeset
|
355 |
|
10
e2a6fed32d54
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
9
diff
changeset
|
356 # choose the right plots based on task type |
|
8
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
357 if self.task_type == "classification": |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
358 plot_names = [ |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
359 "learning", |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
360 "vc", |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
361 "calibration", |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
362 "dimension", |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
363 "manifold", |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
364 "rfe", |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
365 "threshold", |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
366 "percentage_above_below", |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
367 "class_report", |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
368 "pr_auc", |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
369 "roc_auc", |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
370 ] |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
371 else: |
|
10
e2a6fed32d54
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
9
diff
changeset
|
372 plot_names = ["residuals", "vc", "parameter", "error", |
|
e2a6fed32d54
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
9
diff
changeset
|
373 "learning"] |
|
8
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
374 for name in plot_names: |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
375 try: |
|
10
e2a6fed32d54
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
9
diff
changeset
|
376 ax = self.exp.plot_model( |
|
e2a6fed32d54
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
9
diff
changeset
|
377 self.best_model, plot=name, save=False |
|
e2a6fed32d54
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
9
diff
changeset
|
378 ) |
|
8
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
379 out_path = Path(self.output_dir) / f"plot_{name}.png" |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
380 fig = ax.get_figure() |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
381 fig.savefig(out_path, bbox_inches="tight") |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
382 self.plots[name] = str(out_path) |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
383 except Exception as e: |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
384 LOG.warning(f"Could not generate {name} plot: {e}") |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
385 |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
386 def encode_image_to_base64(self, img_path: str) -> str: |
|
6
a32ff7201629
planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
3
diff
changeset
|
387 with open(img_path, "rb") as img_file: |
|
a32ff7201629
planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
3
diff
changeset
|
388 return base64.b64encode(img_file.read()).decode("utf-8") |
|
0
1f20fe57fdee
planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff
changeset
|
389 |
|
16
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
14
diff
changeset
|
390 def _resolve_plot_callable(self, key, fig_or_fn, section): |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
14
diff
changeset
|
391 """ |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
14
diff
changeset
|
392 Safely execute stored plot callables so a single failure does not |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
14
diff
changeset
|
393 abort the entire HTML report generation. |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
14
diff
changeset
|
394 """ |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
14
diff
changeset
|
395 if fig_or_fn is None: |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
14
diff
changeset
|
396 return None |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
14
diff
changeset
|
397 try: |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
14
diff
changeset
|
398 return fig_or_fn() if callable(fig_or_fn) else fig_or_fn |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
14
diff
changeset
|
399 except Exception as exc: |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
14
diff
changeset
|
400 extra = "" |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
14
diff
changeset
|
401 if isinstance(exc, ValueError) and "Input contains NaN" in str(exc): |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
14
diff
changeset
|
402 extra = ( |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
14
diff
changeset
|
403 " (model returned NaN probabilities; " |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
14
diff
changeset
|
404 "consider checking data preprocessing)" |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
14
diff
changeset
|
405 ) |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
14
diff
changeset
|
406 LOG.warning( |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
14
diff
changeset
|
407 "Skipping %s plot '%s' due to error: %s%s", |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
14
diff
changeset
|
408 section, |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
14
diff
changeset
|
409 key, |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
14
diff
changeset
|
410 exc, |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
14
diff
changeset
|
411 extra, |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
14
diff
changeset
|
412 ) |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
14
diff
changeset
|
413 return None |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
14
diff
changeset
|
414 |
|
0
1f20fe57fdee
planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff
changeset
|
415 def save_html_report(self): |
|
1f20fe57fdee
planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff
changeset
|
416 LOG.info("Saving HTML report") |
|
1f20fe57fdee
planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff
changeset
|
417 |
|
8
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
418 # 1) Determine best model name |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
419 try: |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
420 best_model_name = str(self.results.iloc[0]["Model"]) |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
421 except Exception: |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
422 best_model_name = type(self.best_model).__name__ |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
423 LOG.info(f"Best model determined as: {best_model_name}") |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
424 |
|
10
e2a6fed32d54
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
9
diff
changeset
|
425 # 2) Compute training sample count |
|
8
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
426 try: |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
427 n_train = self.exp.X_train.shape[0] |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
428 except Exception: |
|
10
e2a6fed32d54
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
9
diff
changeset
|
429 n_train = getattr( |
|
e2a6fed32d54
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
9
diff
changeset
|
430 self.exp, "X_train_transformed", pd.DataFrame() |
|
e2a6fed32d54
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
9
diff
changeset
|
431 ).shape[0] |
|
8
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
432 total_rows = self.data.shape[0] |
|
6
a32ff7201629
planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
3
diff
changeset
|
433 |
|
8
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
434 # 3) Build setup parameters table |
|
9
c6c1f8777aae
planemo upload for repository https://github.com/goeckslab/gleam commit 4a11e8a4c4e9daa884bddedfa47090476c517667
goeckslab
parents:
8
diff
changeset
|
435 all_params = self.setup_params.copy() |
|
10
e2a6fed32d54
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
9
diff
changeset
|
436 if self.task_type == "classification" and ( |
|
e2a6fed32d54
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
9
diff
changeset
|
437 hasattr(self, "probability_threshold") |
|
e2a6fed32d54
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
9
diff
changeset
|
438 ): |
|
e2a6fed32d54
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
9
diff
changeset
|
439 all_params["probability_threshold"] = ( |
|
e2a6fed32d54
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
9
diff
changeset
|
440 self.probability_threshold |
|
e2a6fed32d54
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
9
diff
changeset
|
441 ) |
|
8
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
442 display_keys = [ |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
443 "Target", |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
444 "Session ID", |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
445 "Train Size", |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
446 "Normalize", |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
447 "Feature Selection", |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
448 "Cross Validation", |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
449 "Cross Validation Folds", |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
450 "Remove Outliers", |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
451 "Remove Multicollinearity", |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
452 "Polynomial Features", |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
453 "Fix Imbalance", |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
454 "Models", |
|
9
c6c1f8777aae
planemo upload for repository https://github.com/goeckslab/gleam commit 4a11e8a4c4e9daa884bddedfa47090476c517667
goeckslab
parents:
8
diff
changeset
|
455 "Probability Threshold", |
|
8
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
456 ] |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
457 setup_rows = [] |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
458 for key in display_keys: |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
459 pk = key.lower().replace(" ", "_") |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
460 v = all_params.get(pk) |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
461 if key == "Train Size": |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
462 frac = ( |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
463 float(v) |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
464 if v is not None |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
465 else (n_train / total_rows if total_rows else 0) |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
466 ) |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
467 dv = f"{frac:.2f} ({n_train} rows)" |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
468 elif key in { |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
469 "Normalize", |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
470 "Feature Selection", |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
471 "Cross Validation", |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
472 "Remove Outliers", |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
473 "Remove Multicollinearity", |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
474 "Polynomial Features", |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
475 "Fix Imbalance", |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
476 }: |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
477 dv = bool(v) |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
478 elif key == "Cross Validation Folds": |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
479 dv = v if v is not None else "None" |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
480 elif key == "Models": |
|
10
e2a6fed32d54
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
9
diff
changeset
|
481 dv = ", ".join(map(str, v)) if isinstance( |
|
e2a6fed32d54
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
9
diff
changeset
|
482 v, (list, tuple) |
|
e2a6fed32d54
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
9
diff
changeset
|
483 ) else "None" |
|
9
c6c1f8777aae
planemo upload for repository https://github.com/goeckslab/gleam commit 4a11e8a4c4e9daa884bddedfa47090476c517667
goeckslab
parents:
8
diff
changeset
|
484 elif key == "Probability Threshold": |
|
10
e2a6fed32d54
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
9
diff
changeset
|
485 dv = f"{v:.2f}" if v is not None else "0.5" |
|
8
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
486 else: |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
487 dv = v if v is not None else "None" |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
488 setup_rows.append([key, dv]) |
|
16
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
14
diff
changeset
|
489 metric_label = self._best_model_metric_used or getattr( |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
14
diff
changeset
|
490 self.exp, "_fold_metric", None |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
14
diff
changeset
|
491 ) |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
14
diff
changeset
|
492 if metric_label: |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
14
diff
changeset
|
493 setup_rows.append(["Best Model Metric", metric_label]) |
|
8
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
494 |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
495 df_setup = pd.DataFrame(setup_rows, columns=["Parameter", "Value"]) |
|
10
e2a6fed32d54
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
9
diff
changeset
|
496 df_setup.to_csv( |
|
e2a6fed32d54
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
9
diff
changeset
|
497 Path(self.output_dir) / "setup_params.csv", index=False |
|
e2a6fed32d54
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
9
diff
changeset
|
498 ) |
|
8
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
499 |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
500 # 4) Persist CSVs |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
501 self.results.to_csv( |
|
10
e2a6fed32d54
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
9
diff
changeset
|
502 Path(self.output_dir) / "comparison_results.csv", |
|
e2a6fed32d54
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
9
diff
changeset
|
503 index=False |
|
8
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
504 ) |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
505 self.test_result_df.to_csv( |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
506 Path(self.output_dir) / "test_results.csv", index=False |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
507 ) |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
508 pd.DataFrame( |
|
10
e2a6fed32d54
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
9
diff
changeset
|
509 self.best_model.get_params().items(), |
|
e2a6fed32d54
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
9
diff
changeset
|
510 columns=["Parameter", "Value"] |
|
8
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
511 ).to_csv(Path(self.output_dir) / "best_model.csv", index=False) |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
512 |
|
10
e2a6fed32d54
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
9
diff
changeset
|
513 if self.tuning_results is not None: |
|
e2a6fed32d54
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
9
diff
changeset
|
514 self.tuning_results.to_csv( |
|
e2a6fed32d54
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
9
diff
changeset
|
515 Path(self.output_dir) / "tuning_results.csv", |
|
e2a6fed32d54
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
9
diff
changeset
|
516 index=False |
|
e2a6fed32d54
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
9
diff
changeset
|
517 ) |
|
e2a6fed32d54
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
9
diff
changeset
|
518 |
|
8
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
519 # 5) Header |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
520 header = f"<h2>Best Model: {best_model_name}</h2>" |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
521 |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
522 # — Validation Summary & Configuration — |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
523 val_df = self.results.copy() |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
524 # mapping raw plot keys to user-friendly titles |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
525 plot_title_map = { |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
526 "learning": "Learning Curve", |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
527 "vc": "Validation Curve", |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
528 "calibration": "Calibration Curve", |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
529 "dimension": "Dimensionality Reduction", |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
530 "manifold": "Manifold Learning", |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
531 "rfe": "Recursive Feature Elimination", |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
532 "threshold": "Threshold Plot", |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
533 "percentage_above_below": "Percentage Above vs. Below Cutoff", |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
534 "class_report": "Classification Report", |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
535 "pr_auc": "Precision-Recall AUC", |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
536 "roc_auc": "Receiver Operating Characteristic AUC", |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
537 "residuals": "Residuals Distribution", |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
538 "error": "Prediction Error Distribution", |
|
0
1f20fe57fdee
planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff
changeset
|
539 } |
|
10
e2a6fed32d54
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
9
diff
changeset
|
540 val_df.drop( |
|
e2a6fed32d54
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
9
diff
changeset
|
541 columns=["TT (Ec)", "TT (Sec)"], errors="ignore", inplace=True |
|
e2a6fed32d54
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
9
diff
changeset
|
542 ) |
|
8
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
543 summary_html = ( |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
544 header |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
545 + "<h2>Train & Validation Summary</h2>" |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
546 + '<div class="table-wrapper">' |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
547 + val_df.to_html(index=False, classes="table sortable") |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
548 + "</div>" |
|
10
e2a6fed32d54
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
9
diff
changeset
|
549 ) |
|
e2a6fed32d54
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
9
diff
changeset
|
550 |
|
e2a6fed32d54
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
9
diff
changeset
|
551 if self.tuning_results is not None: |
|
e2a6fed32d54
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
9
diff
changeset
|
552 tuning_df = self.tuning_results.copy() |
|
e2a6fed32d54
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
9
diff
changeset
|
553 tuning_df.drop( |
|
e2a6fed32d54
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
9
diff
changeset
|
554 columns=["TT (Sec)"], errors="ignore", inplace=True |
|
e2a6fed32d54
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
9
diff
changeset
|
555 ) |
|
e2a6fed32d54
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
9
diff
changeset
|
556 summary_html += ( |
|
e2a6fed32d54
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
9
diff
changeset
|
557 f"<h2>{best_model_name}: Tuning Summary</h2>" |
|
e2a6fed32d54
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
9
diff
changeset
|
558 + '<div class="table-wrapper">' |
|
e2a6fed32d54
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
9
diff
changeset
|
559 + tuning_df.to_html(index=False, classes="table sortable") |
|
e2a6fed32d54
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
9
diff
changeset
|
560 + "</div>" |
|
e2a6fed32d54
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
9
diff
changeset
|
561 ) |
|
e2a6fed32d54
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
9
diff
changeset
|
562 |
|
e2a6fed32d54
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
9
diff
changeset
|
563 summary_html += ( |
|
e2a6fed32d54
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
9
diff
changeset
|
564 "<h2>Setup Parameters</h2>" |
|
8
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
565 + '<div class="table-wrapper">' |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
566 + df_setup.to_html(index=False, classes="table sortable") |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
567 + "</div>" |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
568 # — Hyperparameters |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
569 + "<h2>Best Model Hyperparameters</h2>" |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
570 + '<div class="table-wrapper">' |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
571 + pd.DataFrame( |
|
10
e2a6fed32d54
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
9
diff
changeset
|
572 self.best_model.get_params().items(), |
|
e2a6fed32d54
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
9
diff
changeset
|
573 columns=["Parameter", "Value"] |
|
8
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
574 ).to_html(index=False, classes="table sortable") |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
575 + "</div>" |
|
3
ccd798db5abb
planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit cf47efb521b91a9cb44ae5c5ade860627f9b9030
goeckslab
parents:
2
diff
changeset
|
576 ) |
|
0
1f20fe57fdee
planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff
changeset
|
577 |
|
8
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
578 # choose summary plots based on task type |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
579 if self.task_type == "classification": |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
580 summary_plots = [ |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
581 "learning", |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
582 "vc", |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
583 "calibration", |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
584 "dimension", |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
585 "manifold", |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
586 "rfe", |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
587 "threshold", |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
588 "percentage_above_below", |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
589 ] |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
590 else: |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
591 summary_plots = ["learning", "vc", "parameter", "residuals"] |
|
0
1f20fe57fdee
planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff
changeset
|
592 |
|
8
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
593 for name in summary_plots: |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
594 if name in self.plots: |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
595 summary_html += "<hr>" |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
596 b64 = encode_image_to_base64(self.plots[name]) |
|
10
e2a6fed32d54
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
9
diff
changeset
|
597 title = plot_title_map.get( |
|
e2a6fed32d54
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
9
diff
changeset
|
598 name, name.replace("_", " ").title() |
|
e2a6fed32d54
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
9
diff
changeset
|
599 ) |
|
8
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
600 summary_html += ( |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
601 '<div class="plot">' |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
602 f"<h2>{title}</h2>" |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
603 f'<img src="data:image/png;base64,{b64}" ' |
|
10
e2a6fed32d54
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
9
diff
changeset
|
604 'style="max-width:90%;max-height:600px;' |
|
e2a6fed32d54
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
9
diff
changeset
|
605 'border:1px solid #ddd;"/>' |
|
8
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
606 "</div>" |
|
6
a32ff7201629
planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
3
diff
changeset
|
607 ) |
|
0
1f20fe57fdee
planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff
changeset
|
608 |
|
8
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
609 # — Test Summary — |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
610 test_html = ( |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
611 header |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
612 + '<div class="table-wrapper">' |
|
10
e2a6fed32d54
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
9
diff
changeset
|
613 + self.test_result_df.to_html( |
|
e2a6fed32d54
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
9
diff
changeset
|
614 index=False, classes="table sortable" |
|
e2a6fed32d54
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
9
diff
changeset
|
615 ) |
|
8
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
616 + "</div>" |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
617 ) |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
618 if self.task_type == "regression": |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
619 try: |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
620 y_true = ( |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
621 pd.Series(self.exp.y_test_transformed) |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
622 .reset_index(drop=True) |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
623 .rename("True") |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
624 ) |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
625 y_pred = pd.Series( |
|
10
e2a6fed32d54
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
9
diff
changeset
|
626 self.best_model.predict( |
|
e2a6fed32d54
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
9
diff
changeset
|
627 self.exp.X_test_transformed |
|
e2a6fed32d54
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
9
diff
changeset
|
628 ) |
|
8
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
629 ).rename("Predicted") |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
630 df_tp = pd.concat([y_true, y_pred], axis=1) |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
631 test_html += "<h2>True vs Predicted Values</h2>" |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
632 test_html += ( |
|
10
e2a6fed32d54
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
9
diff
changeset
|
633 '<div class="table-wrapper" ' |
|
e2a6fed32d54
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
9
diff
changeset
|
634 'style="max-height:400px; overflow-y:auto;">' |
|
e2a6fed32d54
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
9
diff
changeset
|
635 + df_tp.head(50).to_html( |
|
e2a6fed32d54
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
9
diff
changeset
|
636 index=False, classes="table sortable" |
|
e2a6fed32d54
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
9
diff
changeset
|
637 ) |
|
8
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
638 + "</div>" |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
639 + add_hr_to_html() |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
640 ) |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
641 except Exception as e: |
|
10
e2a6fed32d54
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
9
diff
changeset
|
642 LOG.warning( |
|
e2a6fed32d54
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
9
diff
changeset
|
643 f"Could not generate True vs Predicted table: {e}" |
|
e2a6fed32d54
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
9
diff
changeset
|
644 ) |
|
8
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
645 |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
646 # 5a) Explainer-substituted plots in order |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
647 if self.task_type == "regression": |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
648 test_order = ["residuals"] |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
649 else: |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
650 test_order = [ |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
651 "confusion_matrix", |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
652 "roc_auc", |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
653 "pr_auc", |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
654 "lift_curve", |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
655 "cumulative_precision", |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
656 ] |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
657 for key in test_order: |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
658 fig_or_fn = self.explainer_plots.pop(key, None) |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
659 if fig_or_fn is not None: |
|
16
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
14
diff
changeset
|
660 fig = self._resolve_plot_callable( |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
14
diff
changeset
|
661 key, fig_or_fn, section="test/explainer" |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
14
diff
changeset
|
662 ) |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
14
diff
changeset
|
663 if fig is None: |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
14
diff
changeset
|
664 continue |
|
10
e2a6fed32d54
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
9
diff
changeset
|
665 title = plot_title_map.get( |
|
e2a6fed32d54
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
9
diff
changeset
|
666 key, key.replace("_", " ").title() |
|
e2a6fed32d54
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
9
diff
changeset
|
667 ) |
|
8
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
668 test_html += ( |
|
10
e2a6fed32d54
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
9
diff
changeset
|
669 f"<h2>{title}</h2>" + add_plot_to_html(fig) |
|
e2a6fed32d54
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
9
diff
changeset
|
670 + add_hr_to_html() |
|
8
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
671 ) |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
672 # 5b) Remaining PyCaret test plots |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
673 for name, path in self.plots.items(): |
|
10
e2a6fed32d54
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
9
diff
changeset
|
674 # classification: include only the small extras, before |
|
e2a6fed32d54
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
9
diff
changeset
|
675 # skipping anything |
|
e2a6fed32d54
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
9
diff
changeset
|
676 if self.task_type == "classification" and ( |
|
e2a6fed32d54
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
9
diff
changeset
|
677 name in { |
|
e2a6fed32d54
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
9
diff
changeset
|
678 "pr_auc", |
|
e2a6fed32d54
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
9
diff
changeset
|
679 "class_report", |
|
e2a6fed32d54
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
9
diff
changeset
|
680 } |
|
e2a6fed32d54
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
9
diff
changeset
|
681 ): |
|
e2a6fed32d54
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
9
diff
changeset
|
682 title = plot_title_map.get( |
|
e2a6fed32d54
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
9
diff
changeset
|
683 name, name.replace("_", " ").title() |
|
e2a6fed32d54
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
9
diff
changeset
|
684 ) |
|
8
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
685 b64 = encode_image_to_base64(path) |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
686 test_html += ( |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
687 f"<h2>{title}</h2>" |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
688 "<div class='plot'>" |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
689 f"<img src='data:image/png;base64,{b64}' " |
|
10
e2a6fed32d54
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
9
diff
changeset
|
690 "style='max-width:90%;max-height:600px;" |
|
e2a6fed32d54
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
9
diff
changeset
|
691 "border:1px solid #ddd;'/>" |
|
8
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
692 "</div>" + add_hr_to_html() |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
693 ) |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
694 continue |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
695 |
|
10
e2a6fed32d54
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
9
diff
changeset
|
696 # regression: explicitly include the 'error' plot, |
|
e2a6fed32d54
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
9
diff
changeset
|
697 # before skipping |
|
e2a6fed32d54
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
9
diff
changeset
|
698 if self.task_type == "regression" and ( |
|
e2a6fed32d54
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
9
diff
changeset
|
699 name == "error" |
|
e2a6fed32d54
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
9
diff
changeset
|
700 ): |
|
e2a6fed32d54
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
9
diff
changeset
|
701 title = plot_title_map.get( |
|
e2a6fed32d54
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
9
diff
changeset
|
702 "error", "Prediction Error Distribution" |
|
e2a6fed32d54
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
9
diff
changeset
|
703 ) |
|
8
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
704 b64 = encode_image_to_base64(path) |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
705 test_html += ( |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
706 f"<h2>{title}</h2>" |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
707 "<div class='plot'>" |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
708 f"<img src='data:image/png;base64,{b64}' " |
|
10
e2a6fed32d54
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
9
diff
changeset
|
709 "style='max-width:90%;max-height:600px;" |
|
e2a6fed32d54
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
9
diff
changeset
|
710 "border:1px solid #ddd;'/>" |
|
8
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
711 "</div>" + add_hr_to_html() |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
712 ) |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
713 continue |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
714 |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
715 # now skip any plots already rendered via test_order |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
716 if name in test_order: |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
717 continue |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
718 |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
719 # — Feature Importance — |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
720 feature_html = header |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
721 |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
722 # 6a) PyCaret’s default feature importances |
|
16
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
14
diff
changeset
|
723 imputed_data = ( |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
14
diff
changeset
|
724 self.imputed_training_data |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
14
diff
changeset
|
725 if self.imputed_training_data is not None |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
14
diff
changeset
|
726 else self.data |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
14
diff
changeset
|
727 ) |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
14
diff
changeset
|
728 fi_analyzer = FeatureImportanceAnalyzer( |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
14
diff
changeset
|
729 data=imputed_data, |
|
0
1f20fe57fdee
planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff
changeset
|
730 target_col=self.target_col, |
|
1f20fe57fdee
planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff
changeset
|
731 task_type=self.task_type, |
|
3
ccd798db5abb
planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit cf47efb521b91a9cb44ae5c5ade860627f9b9030
goeckslab
parents:
2
diff
changeset
|
732 output_dir=self.output_dir, |
|
6
a32ff7201629
planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
3
diff
changeset
|
733 exp=self.exp, |
|
a32ff7201629
planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
3
diff
changeset
|
734 best_model=self.best_model, |
|
16
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
14
diff
changeset
|
735 max_plot_features=self.plot_feature_limit, |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
14
diff
changeset
|
736 processed_data=self.imputed_training_data, |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
14
diff
changeset
|
737 max_shap_rows=self._shap_row_cap, |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
14
diff
changeset
|
738 ) |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
14
diff
changeset
|
739 fi_html = fi_analyzer.run() |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
14
diff
changeset
|
740 # Add a small table to show SHAP feature caps near the Best Model header. |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
14
diff
changeset
|
741 cap_rows = [] |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
14
diff
changeset
|
742 if fi_analyzer.shap_total_features is not None: |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
14
diff
changeset
|
743 cap_rows.append( |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
14
diff
changeset
|
744 ("Total transformed features", fi_analyzer.shap_total_features) |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
14
diff
changeset
|
745 ) |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
14
diff
changeset
|
746 if fi_analyzer.shap_used_features is not None: |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
14
diff
changeset
|
747 cap_rows.append( |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
14
diff
changeset
|
748 ("Features used in SHAP", fi_analyzer.shap_used_features) |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
14
diff
changeset
|
749 ) |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
14
diff
changeset
|
750 if cap_rows: |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
14
diff
changeset
|
751 cap_table = ( |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
14
diff
changeset
|
752 "<div class='table-wrapper'>" |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
14
diff
changeset
|
753 "<table class='table sortable'>" |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
14
diff
changeset
|
754 "<thead><tr><th>Feature Importance Scope</th><th>Count</th></tr></thead>" |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
14
diff
changeset
|
755 "<tbody>" |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
14
diff
changeset
|
756 + "".join( |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
14
diff
changeset
|
757 f"<tr><td>{label}</td><td>{value}</td></tr>" |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
14
diff
changeset
|
758 for label, value in cap_rows |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
14
diff
changeset
|
759 ) |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
14
diff
changeset
|
760 + "</tbody></table></div>" |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
14
diff
changeset
|
761 ) |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
14
diff
changeset
|
762 feature_html += cap_table |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
14
diff
changeset
|
763 feature_html += fi_html |
|
0
1f20fe57fdee
planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff
changeset
|
764 |
|
8
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
765 # 6b) Explainer SHAP importances |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
766 for key in ["shap_mean", "shap_perm"]: |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
767 fig_or_fn = self.explainer_plots.pop(key, None) |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
768 if fig_or_fn is not None: |
|
16
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
14
diff
changeset
|
769 fig = self._resolve_plot_callable( |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
14
diff
changeset
|
770 key, fig_or_fn, section="feature importance" |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
14
diff
changeset
|
771 ) |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
14
diff
changeset
|
772 if fig is None: |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
14
diff
changeset
|
773 continue |
|
8
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
774 # give SHAP plots explicit titles |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
775 title = ( |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
776 "Mean Absolute SHAP Value Impact" |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
777 if key == "shap_mean" |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
778 else "Permutation Feature Importance" |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
779 ) |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
780 feature_html += ( |
|
10
e2a6fed32d54
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
9
diff
changeset
|
781 f"<h2>{title}</h2>" + add_plot_to_html(fig) |
|
e2a6fed32d54
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
9
diff
changeset
|
782 + add_hr_to_html() |
|
8
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
783 ) |
|
6
a32ff7201629
planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
3
diff
changeset
|
784 |
|
8
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
785 # 6c) PDPs last |
|
10
e2a6fed32d54
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
9
diff
changeset
|
786 pdp_keys = sorted( |
|
e2a6fed32d54
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
9
diff
changeset
|
787 k for k in self.explainer_plots if k.startswith("pdp__") |
|
e2a6fed32d54
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
9
diff
changeset
|
788 ) |
|
8
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
789 for k in pdp_keys: |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
790 fig_or_fn = self.explainer_plots[k] |
|
16
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
14
diff
changeset
|
791 fig = self._resolve_plot_callable( |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
14
diff
changeset
|
792 k, fig_or_fn, section="pdp" |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
14
diff
changeset
|
793 ) |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
14
diff
changeset
|
794 if fig is None: |
|
4fee4504646e
planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents:
14
diff
changeset
|
795 continue |
|
8
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
796 # extract feature name |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
797 feature = k.split("__", 1)[1] |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
798 title = f"Partial Dependence for {feature}" |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
799 feature_html += ( |
|
10
e2a6fed32d54
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
9
diff
changeset
|
800 f"<h2>{title}</h2>" + add_plot_to_html(fig) |
|
e2a6fed32d54
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
9
diff
changeset
|
801 + add_hr_to_html() |
|
6
a32ff7201629
planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
3
diff
changeset
|
802 ) |
|
8
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
803 # 7) Assemble final HTML (three tabs) |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
804 html = get_html_template() |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
805 html += "<h1>Tabular Learner Model Report</h1>" |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
806 html += build_tabbed_html(summary_html, test_html, feature_html) |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
807 html += get_feature_metrics_help_modal() |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
808 html += get_html_closing() |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
809 |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
810 # 8) Write out |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
811 (Path(self.output_dir) / "comparison_result.html").write_text( |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
812 html, encoding="utf-8" |
|
6
a32ff7201629
planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
3
diff
changeset
|
813 ) |
|
10
e2a6fed32d54
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
9
diff
changeset
|
814 LOG.info( |
|
e2a6fed32d54
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
9
diff
changeset
|
815 f"HTML report generated at: " |
|
e2a6fed32d54
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
9
diff
changeset
|
816 f"{self.output_dir}/comparison_result.html" |
|
e2a6fed32d54
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
9
diff
changeset
|
817 ) |
|
0
1f20fe57fdee
planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff
changeset
|
818 |
|
1f20fe57fdee
planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff
changeset
|
819 def save_dashboard(self): |
|
1f20fe57fdee
planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff
changeset
|
820 raise NotImplementedError("Subclasses should implement this method") |
|
1f20fe57fdee
planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff
changeset
|
821 |
|
1f20fe57fdee
planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff
changeset
|
822 def generate_plots_explainer(self): |
|
1f20fe57fdee
planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff
changeset
|
823 raise NotImplementedError("Subclasses should implement this method") |
|
1f20fe57fdee
planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff
changeset
|
824 |
|
1f20fe57fdee
planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff
changeset
|
825 def generate_tree_plots(self): |
|
10
e2a6fed32d54
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
9
diff
changeset
|
826 from sklearn.ensemble import ( |
|
e2a6fed32d54
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
9
diff
changeset
|
827 RandomForestClassifier, RandomForestRegressor |
|
e2a6fed32d54
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
9
diff
changeset
|
828 ) |
|
0
1f20fe57fdee
planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff
changeset
|
829 from xgboost import XGBClassifier, XGBRegressor |
|
1f20fe57fdee
planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff
changeset
|
830 from explainerdashboard.explainers import RandomForestExplainer |
|
1f20fe57fdee
planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff
changeset
|
831 |
|
1f20fe57fdee
planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff
changeset
|
832 LOG.info("Generating tree plots") |
|
1f20fe57fdee
planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff
changeset
|
833 X_test = self.exp.X_test_transformed.copy() |
|
1f20fe57fdee
planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff
changeset
|
834 y_test = self.exp.y_test_transformed |
|
1f20fe57fdee
planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff
changeset
|
835 |
|
10
e2a6fed32d54
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
9
diff
changeset
|
836 if isinstance( |
|
e2a6fed32d54
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
9
diff
changeset
|
837 self.best_model, (RandomForestClassifier, RandomForestRegressor) |
|
e2a6fed32d54
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
9
diff
changeset
|
838 ): |
|
8
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
839 n_trees = self.best_model.n_estimators |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
840 elif isinstance(self.best_model, (XGBClassifier, XGBRegressor)): |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
841 n_trees = len(self.best_model.get_booster().get_dump()) |
|
6
a32ff7201629
planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
3
diff
changeset
|
842 else: |
|
a32ff7201629
planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
3
diff
changeset
|
843 LOG.warning("Tree plots not supported for this model type.") |
|
a32ff7201629
planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
3
diff
changeset
|
844 return |
|
0
1f20fe57fdee
planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff
changeset
|
845 |
|
8
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
846 explainer = RandomForestExplainer(self.best_model, X_test, y_test) |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
847 for i in range(n_trees): |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
848 fig = explainer.decisiontree_encoded(tree_idx=i, index=0) |
|
1aed7d47c5ec
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
7
diff
changeset
|
849 self.trees.append(fig) |
|
0
1f20fe57fdee
planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff
changeset
|
850 |
|
1f20fe57fdee
planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff
changeset
|
851 def run(self): |
|
1f20fe57fdee
planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff
changeset
|
852 self.load_data() |
|
1f20fe57fdee
planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff
changeset
|
853 self.setup_pycaret() |
|
1f20fe57fdee
planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff
changeset
|
854 self.train_model() |
|
1f20fe57fdee
planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff
changeset
|
855 self.save_model() |
|
1f20fe57fdee
planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff
changeset
|
856 self.generate_plots() |
|
1f20fe57fdee
planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff
changeset
|
857 self.generate_plots_explainer() |
|
1f20fe57fdee
planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff
changeset
|
858 self.generate_tree_plots() |
|
1f20fe57fdee
planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff
changeset
|
859 self.save_html_report() |
|
1f20fe57fdee
planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff
changeset
|
860 # self.save_dashboard() |
