annotate base_model_trainer.py @ 17:c5c324ac29fc draft default tip

planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
author goeckslab
date Sat, 06 Dec 2025 14:20:36 +0000
parents 4fee4504646e
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
1 import base64
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
2 import logging
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
3 import tempfile
8
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
4 from pathlib import Path
0
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
5
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
6 import h5py
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
7 import joblib
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
8 import numpy as np
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
9 import pandas as pd
6
a32ff7201629 planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 3
diff changeset
10 from feature_help_modal import get_feature_metrics_help_modal
3
ccd798db5abb planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit cf47efb521b91a9cb44ae5c5ade860627f9b9030
goeckslab
parents: 2
diff changeset
11 from feature_importance import FeatureImportanceAnalyzer
17
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
12 from sklearn.metrics import (
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
13 accuracy_score,
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
14 average_precision_score,
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
15 confusion_matrix,
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
16 f1_score,
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
17 matthews_corrcoef,
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
18 precision_score,
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
19 recall_score,
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
20 roc_auc_score,
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
21 )
8
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
22 from utils import (
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
23 add_hr_to_html,
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
24 add_plot_to_html,
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
25 build_tabbed_html,
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
26 encode_image_to_base64,
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
27 get_html_closing,
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
28 get_html_template,
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
29 )
0
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
30
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
31 logging.basicConfig(level=logging.DEBUG)
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
32 LOG = logging.getLogger(__name__)
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
33
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
34
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
35 class BaseModelTrainer:
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
36 def __init__(
6
a32ff7201629 planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 3
diff changeset
37 self,
a32ff7201629 planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 3
diff changeset
38 input_file,
a32ff7201629 planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 3
diff changeset
39 target_col,
a32ff7201629 planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 3
diff changeset
40 output_dir,
a32ff7201629 planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 3
diff changeset
41 task_type,
a32ff7201629 planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 3
diff changeset
42 random_seed,
a32ff7201629 planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 3
diff changeset
43 test_file=None,
a32ff7201629 planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 3
diff changeset
44 **kwargs,
a32ff7201629 planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 3
diff changeset
45 ):
8
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
46 self.exp = None
0
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
47 self.input_file = input_file
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
48 self.target_col = target_col
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
49 self.output_dir = output_dir
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
50 self.task_type = task_type
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
51 self.random_seed = random_seed
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
52 self.data = None
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
53 self.target = None
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
54 self.best_model = None
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
55 self.results = None
10
e2a6fed32d54 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 9
diff changeset
56 self.tuning_results = None
0
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
57 self.features_name = None
16
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 14
diff changeset
58 self.plot_feature_names = None
0
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
59 self.plots = {}
8
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
60 self.explainer_plots = {}
0
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
61 self.plots_explainer_html = None
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
62 self.trees = []
8
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
63 self.user_kwargs = kwargs.copy()
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
64 for key, value in self.user_kwargs.items():
0
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
65 setattr(self, key, value)
16
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 14
diff changeset
66 if not hasattr(self, "plot_feature_limit"):
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 14
diff changeset
67 self.plot_feature_limit = 30
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 14
diff changeset
68 self._shap_row_cap = None
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 14
diff changeset
69 if getattr(self, "polynomial_features", False):
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 14
diff changeset
70 # Keep feature importance responsive by trimming plots/SHAP rows
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 14
diff changeset
71 try:
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 14
diff changeset
72 limit_val = int(self.plot_feature_limit)
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 14
diff changeset
73 except (TypeError, ValueError):
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 14
diff changeset
74 limit_val = 30
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 14
diff changeset
75 self.plot_feature_limit = min(limit_val, 15)
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 14
diff changeset
76 self._shap_row_cap = 200
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 14
diff changeset
77 LOG.info(
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 14
diff changeset
78 "Polynomial features enabled; limiting feature plots to %s and SHAP rows to %s",
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 14
diff changeset
79 self.plot_feature_limit,
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 14
diff changeset
80 self._shap_row_cap,
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 14
diff changeset
81 )
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 14
diff changeset
82 self.imputed_training_data = None
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 14
diff changeset
83 self._best_model_metric_used = None
0
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
84 self.setup_params = {}
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
85 self.test_file = test_file
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
86 self.test_data = None
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
87
6
a32ff7201629 planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 3
diff changeset
88 if not self.output_dir:
10
e2a6fed32d54 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 9
diff changeset
89 raise ValueError(
e2a6fed32d54 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 9
diff changeset
90 "output_dir must be specified and not None"
e2a6fed32d54 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 9
diff changeset
91 )
e2a6fed32d54 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 9
diff changeset
92
e2a6fed32d54 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 9
diff changeset
93 # Warn about irrelevant kwargs for the task type
e2a6fed32d54 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 9
diff changeset
94 if self.task_type == "regression" and (
e2a6fed32d54 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 9
diff changeset
95 "probability_threshold" in self.user_kwargs
e2a6fed32d54 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 9
diff changeset
96 ):
e2a6fed32d54 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 9
diff changeset
97 LOG.warning(
e2a6fed32d54 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 9
diff changeset
98 "probability_threshold is ignored for regression tasks."
e2a6fed32d54 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 9
diff changeset
99 )
6
a32ff7201629 planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 3
diff changeset
100
0
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
101 LOG.info(f"Model kwargs: {self.__dict__}")
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
102
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
103 def load_data(self):
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
104 LOG.info(f"Loading data from {self.input_file}")
10
e2a6fed32d54 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 9
diff changeset
105 self.data = pd.read_csv(
e2a6fed32d54 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 9
diff changeset
106 self.input_file, sep=None, engine="python"
e2a6fed32d54 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 9
diff changeset
107 )
6
a32ff7201629 planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 3
diff changeset
108 self.data.columns = self.data.columns.str.replace(".", "_")
10
e2a6fed32d54 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 9
diff changeset
109
e2a6fed32d54 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 9
diff changeset
110 names = self.data.columns.to_list()
e2a6fed32d54 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 9
diff changeset
111 LOG.info(f"Original dataset columns: {names}")
e2a6fed32d54 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 9
diff changeset
112
e2a6fed32d54 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 9
diff changeset
113 target_index = int(self.target_col) - 1
e2a6fed32d54 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 9
diff changeset
114 num_cols = len(names)
e2a6fed32d54 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 9
diff changeset
115 if target_index < 0 or target_index >= num_cols:
e2a6fed32d54 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 9
diff changeset
116 raise ValueError(
e2a6fed32d54 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 9
diff changeset
117 f"Target column number {self.target_col} is invalid. "
e2a6fed32d54 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 9
diff changeset
118 f"Please select a number between 1 and {num_cols}."
e2a6fed32d54 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 9
diff changeset
119 )
e2a6fed32d54 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 9
diff changeset
120
e2a6fed32d54 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 9
diff changeset
121 self.target = names[target_index]
e2a6fed32d54 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 9
diff changeset
122
e2a6fed32d54 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 9
diff changeset
123 # Conditional drop: only if 'prediction_label' exists and is not
e2a6fed32d54 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 9
diff changeset
124 # the target
e2a6fed32d54 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 9
diff changeset
125 if "prediction_label" in self.data.columns and (
e2a6fed32d54 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 9
diff changeset
126 self.data.columns[target_index] != "prediction_label"
e2a6fed32d54 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 9
diff changeset
127 ):
e2a6fed32d54 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 9
diff changeset
128 LOG.info(
e2a6fed32d54 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 9
diff changeset
129 "Dropping 'prediction_label' column as it's not the target."
e2a6fed32d54 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 9
diff changeset
130 )
6
a32ff7201629 planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 3
diff changeset
131 self.data = self.data.drop(columns=["prediction_label"])
10
e2a6fed32d54 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 9
diff changeset
132 else:
e2a6fed32d54 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 9
diff changeset
133 if self.target == "prediction_label":
e2a6fed32d54 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 9
diff changeset
134 LOG.warning(
e2a6fed32d54 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 9
diff changeset
135 "Using 'prediction_label' as target column. "
e2a6fed32d54 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 9
diff changeset
136 "This may not be intended if it's a previous prediction."
e2a6fed32d54 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 9
diff changeset
137 )
6
a32ff7201629 planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 3
diff changeset
138
10
e2a6fed32d54 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 9
diff changeset
139 numeric_cols = self.data.select_dtypes(
e2a6fed32d54 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 9
diff changeset
140 include=["number"]
e2a6fed32d54 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 9
diff changeset
141 ).columns
e2a6fed32d54 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 9
diff changeset
142 non_numeric_cols = self.data.select_dtypes(
e2a6fed32d54 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 9
diff changeset
143 exclude=["number"]
e2a6fed32d54 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 9
diff changeset
144 ).columns
0
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
145 self.data[numeric_cols] = self.data[numeric_cols].apply(
6
a32ff7201629 planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 3
diff changeset
146 pd.to_numeric, errors="coerce"
a32ff7201629 planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 3
diff changeset
147 )
0
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
148 if len(non_numeric_cols) > 0:
10
e2a6fed32d54 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 9
diff changeset
149 LOG.info(
e2a6fed32d54 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 9
diff changeset
150 f"Non-numeric columns found: {non_numeric_cols.tolist()}"
e2a6fed32d54 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 9
diff changeset
151 )
0
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
152
10
e2a6fed32d54 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 9
diff changeset
153 # Update names after possible drop
0
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
154 names = self.data.columns.to_list()
10
e2a6fed32d54 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 9
diff changeset
155 LOG.info(f"Dataset columns after processing: {names}")
e2a6fed32d54 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 9
diff changeset
156
e2a6fed32d54 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 9
diff changeset
157 self.features_name = [n for n in names if n != self.target]
16
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 14
diff changeset
158 self.plot_feature_names = self._select_plot_features(self.features_name)
0
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
159
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
160 if self.test_file:
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
161 LOG.info(f"Loading test data from {self.test_file}")
10
e2a6fed32d54 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 9
diff changeset
162 df_test = pd.read_csv(
e2a6fed32d54 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 9
diff changeset
163 self.test_file, sep=None, engine="python"
e2a6fed32d54 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 9
diff changeset
164 )
8
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
165 df_test.columns = df_test.columns.str.replace(".", "_")
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
166 self.test_data = df_test
0
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
167
16
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 14
diff changeset
168 def _select_plot_features(self, all_features):
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 14
diff changeset
169 limit = getattr(self, "plot_feature_limit", 30)
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 14
diff changeset
170 if not isinstance(limit, int) or limit <= 0:
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 14
diff changeset
171 LOG.info(
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 14
diff changeset
172 "Feature plotting limit disabled (plot_feature_limit=%s).", limit
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 14
diff changeset
173 )
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 14
diff changeset
174 return all_features
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 14
diff changeset
175 if len(all_features) <= limit:
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 14
diff changeset
176 LOG.info(
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 14
diff changeset
177 "Feature plotting limit not needed (%s features <= limit %s).",
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 14
diff changeset
178 len(all_features),
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 14
diff changeset
179 limit,
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 14
diff changeset
180 )
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 14
diff changeset
181 return all_features
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 14
diff changeset
182 df = self.data[all_features].copy()
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 14
diff changeset
183 numeric_cols = df.select_dtypes(include=["number"]).columns
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 14
diff changeset
184 ranked = []
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 14
diff changeset
185 if len(numeric_cols) > 0:
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 14
diff changeset
186 variances = (
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 14
diff changeset
187 df[numeric_cols]
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 14
diff changeset
188 .var()
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 14
diff changeset
189 .fillna(0)
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 14
diff changeset
190 .abs()
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 14
diff changeset
191 .sort_values(ascending=False)
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 14
diff changeset
192 )
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 14
diff changeset
193 ranked = variances.index.tolist()
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 14
diff changeset
194 selected = []
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 14
diff changeset
195 for col in ranked:
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 14
diff changeset
196 if len(selected) >= limit:
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 14
diff changeset
197 break
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 14
diff changeset
198 selected.append(col)
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 14
diff changeset
199 if len(selected) < limit:
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 14
diff changeset
200 for col in all_features:
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 14
diff changeset
201 if col in selected:
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 14
diff changeset
202 continue
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 14
diff changeset
203 selected.append(col)
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 14
diff changeset
204 if len(selected) >= limit:
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 14
diff changeset
205 break
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 14
diff changeset
206 LOG.info(
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 14
diff changeset
207 "Limiting feature-level plots to %s of %s available features (limit=%s).",
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 14
diff changeset
208 len(selected),
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 14
diff changeset
209 len(all_features),
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 14
diff changeset
210 limit,
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 14
diff changeset
211 )
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 14
diff changeset
212 return selected
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 14
diff changeset
213
0
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
214 def setup_pycaret(self):
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
215 LOG.info("Initializing PyCaret")
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
216 self.setup_params = {
6
a32ff7201629 planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 3
diff changeset
217 "target": self.target,
a32ff7201629 planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 3
diff changeset
218 "session_id": self.random_seed,
a32ff7201629 planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 3
diff changeset
219 "html": True,
a32ff7201629 planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 3
diff changeset
220 "log_experiment": False,
a32ff7201629 planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 3
diff changeset
221 "system_log": False,
a32ff7201629 planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 3
diff changeset
222 "index": False,
0
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
223 }
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
224 if self.test_data is not None:
6
a32ff7201629 planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 3
diff changeset
225 self.setup_params["test_data"] = self.test_data
8
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
226 for attr in [
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
227 "train_size",
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
228 "normalize",
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
229 "feature_selection",
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
230 "remove_outliers",
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
231 "remove_multicollinearity",
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
232 "polynomial_features",
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
233 "feature_interaction",
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
234 "feature_ratio",
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
235 "fix_imbalance",
14
7d78a6afc958 planemo upload for repository https://github.com/goeckslab/gleam commit 1ffd143e57fa952ee9dd84fc141771520aea0791
goeckslab
parents: 13
diff changeset
236 "n_jobs",
8
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
237 ]:
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
238 val = getattr(self, attr, None)
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
239 if val is not None:
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
240 self.setup_params[attr] = val
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
241 if getattr(self, "cross_validation_folds", None) is not None:
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
242 self.setup_params["fold"] = self.cross_validation_folds
0
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
243 LOG.info(self.setup_params)
6
a32ff7201629 planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 3
diff changeset
244
a32ff7201629 planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 3
diff changeset
245 if self.task_type == "classification":
a32ff7201629 planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 3
diff changeset
246 from pycaret.classification import ClassificationExperiment
a32ff7201629 planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 3
diff changeset
247
a32ff7201629 planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 3
diff changeset
248 self.exp = ClassificationExperiment()
a32ff7201629 planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 3
diff changeset
249 elif self.task_type == "regression":
a32ff7201629 planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 3
diff changeset
250 from pycaret.regression import RegressionExperiment
a32ff7201629 planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 3
diff changeset
251
a32ff7201629 planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 3
diff changeset
252 self.exp = RegressionExperiment()
a32ff7201629 planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 3
diff changeset
253 else:
10
e2a6fed32d54 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 9
diff changeset
254 raise ValueError(
e2a6fed32d54 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 9
diff changeset
255 "task_type must be 'classification' or 'regression'"
e2a6fed32d54 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 9
diff changeset
256 )
6
a32ff7201629 planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 3
diff changeset
257
0
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
258 self.exp.setup(self.data, **self.setup_params)
16
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 14
diff changeset
259 self._capture_imputed_training_data()
8
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
260 self.setup_params.update(self.user_kwargs)
0
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
261
16
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 14
diff changeset
262 def _capture_imputed_training_data(self):
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 14
diff changeset
263 """
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 14
diff changeset
264 Cache the dataset as transformed/imputed by PyCaret so downstream
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 14
diff changeset
265 components (e.g., feature importance) can operate on the exact data
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 14
diff changeset
266 used for training.
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 14
diff changeset
267 """
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 14
diff changeset
268 if self.exp is None:
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 14
diff changeset
269 return
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 14
diff changeset
270 try:
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 14
diff changeset
271 X_processed = self.exp.get_config("X_transformed").copy()
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 14
diff changeset
272 y_processed = self.exp.get_config("y")
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 14
diff changeset
273 if isinstance(y_processed, pd.Series):
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 14
diff changeset
274 y_series = y_processed.reset_index(drop=True)
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 14
diff changeset
275 else:
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 14
diff changeset
276 y_series = pd.Series(y_processed)
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 14
diff changeset
277 y_series.name = self.target
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 14
diff changeset
278 X_processed = X_processed.reset_index(drop=True)
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 14
diff changeset
279 self.imputed_training_data = pd.concat(
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 14
diff changeset
280 [X_processed, y_series], axis=1
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 14
diff changeset
281 )
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 14
diff changeset
282 LOG.info(
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 14
diff changeset
283 "Captured imputed training dataset from PyCaret "
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 14
diff changeset
284 "(%s rows, %s features).",
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 14
diff changeset
285 self.imputed_training_data.shape[0],
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 14
diff changeset
286 self.imputed_training_data.shape[1] - 1,
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 14
diff changeset
287 )
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 14
diff changeset
288 except Exception as exc:
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 14
diff changeset
289 LOG.warning(
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 14
diff changeset
290 "Unable to capture processed training data from PyCaret: %s",
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 14
diff changeset
291 exc,
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 14
diff changeset
292 )
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 14
diff changeset
293 self.imputed_training_data = None
13
f07850192bc2 planemo upload for repository https://github.com/goeckslab/gleam commit 84d5cd0b1fa5c1ff0ad892bc39c95dad1ceb4920
goeckslab
parents: 10
diff changeset
294
0
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
295 def train_model(self):
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
296 LOG.info("Training and selecting the best model")
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
297 if self.task_type == "classification":
6
a32ff7201629 planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 3
diff changeset
298 self.exp.add_metric(
8
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
299 id="PR-AUC-Weighted",
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
300 name="PR-AUC-Weighted",
6
a32ff7201629 planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 3
diff changeset
301 target="pred_proba",
a32ff7201629 planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 3
diff changeset
302 score_func=average_precision_score,
a32ff7201629 planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 3
diff changeset
303 average="weighted",
a32ff7201629 planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 3
diff changeset
304 )
8
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
305 # Build arguments for compare_models()
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
306 compare_kwargs = {}
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
307 if getattr(self, "models", None):
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
308 compare_kwargs["include"] = self.models
0
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
309
8
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
310 # Respect explicit cross-validation flag
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
311 if getattr(self, "cross_validation", None) is not None:
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
312 compare_kwargs["cross_validation"] = self.cross_validation
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
313
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
314 # Respect explicit fold count
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
315 if getattr(self, "cross_validation_folds", None) is not None:
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
316 compare_kwargs["fold"] = self.cross_validation_folds
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
317
16
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 14
diff changeset
318 best_metric = getattr(self, "best_model_metric", None)
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 14
diff changeset
319 if best_metric:
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 14
diff changeset
320 compare_kwargs["sort"] = best_metric
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 14
diff changeset
321 self._best_model_metric_used = best_metric
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 14
diff changeset
322 LOG.info(f"Ranking models using metric: {best_metric}")
13
f07850192bc2 planemo upload for repository https://github.com/goeckslab/gleam commit 84d5cd0b1fa5c1ff0ad892bc39c95dad1ceb4920
goeckslab
parents: 10
diff changeset
323
8
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
324 LOG.info(f"compare_models kwargs: {compare_kwargs}")
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
325 self.best_model = self.exp.compare_models(**compare_kwargs)
16
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 14
diff changeset
326 if self._best_model_metric_used is None:
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 14
diff changeset
327 self._best_model_metric_used = getattr(self.exp, "_fold_metric", None)
0
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
328 self.results = self.exp.pull()
8
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
329 if getattr(self, "tune_model", False):
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
330 LOG.info("Tuning hyperparameters of the best model")
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
331 self.best_model = self.exp.tune_model(self.best_model)
10
e2a6fed32d54 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 9
diff changeset
332 self.tuning_results = self.exp.pull()
7
f4cb41f458fd planemo upload for repository https://github.com/goeckslab/gleam commit b430f8b466655878c3bf63b053655fdbf039ddb0
goeckslab
parents: 6
diff changeset
333
0
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
334 if self.task_type == "classification":
6
a32ff7201629 planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 3
diff changeset
335 self.results.rename(columns={"AUC": "ROC-AUC"}, inplace=True)
9
c6c1f8777aae planemo upload for repository https://github.com/goeckslab/gleam commit 4a11e8a4c4e9daa884bddedfa47090476c517667
goeckslab
parents: 8
diff changeset
336
c6c1f8777aae planemo upload for repository https://github.com/goeckslab/gleam commit 4a11e8a4c4e9daa884bddedfa47090476c517667
goeckslab
parents: 8
diff changeset
337 prob_thresh = getattr(self, "probability_threshold", None)
10
e2a6fed32d54 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 9
diff changeset
338 if self.task_type == "classification" and (
e2a6fed32d54 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 9
diff changeset
339 prob_thresh is not None
e2a6fed32d54 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 9
diff changeset
340 ):
e2a6fed32d54 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 9
diff changeset
341 _ = self.exp.predict_model(
e2a6fed32d54 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 9
diff changeset
342 self.best_model, probability_threshold=prob_thresh
e2a6fed32d54 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 9
diff changeset
343 )
9
c6c1f8777aae planemo upload for repository https://github.com/goeckslab/gleam commit 4a11e8a4c4e9daa884bddedfa47090476c517667
goeckslab
parents: 8
diff changeset
344 else:
c6c1f8777aae planemo upload for repository https://github.com/goeckslab/gleam commit 4a11e8a4c4e9daa884bddedfa47090476c517667
goeckslab
parents: 8
diff changeset
345 _ = self.exp.predict_model(self.best_model)
c6c1f8777aae planemo upload for repository https://github.com/goeckslab/gleam commit 4a11e8a4c4e9daa884bddedfa47090476c517667
goeckslab
parents: 8
diff changeset
346
0
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
347 self.test_result_df = self.exp.pull()
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
348 if self.task_type == "classification":
10
e2a6fed32d54 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 9
diff changeset
349 self.test_result_df.rename(
e2a6fed32d54 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 9
diff changeset
350 columns={"AUC": "ROC-AUC"}, inplace=True
e2a6fed32d54 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 9
diff changeset
351 )
0
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
352
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
353 def save_model(self):
8
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
354 hdf5_path = Path(self.output_dir) / "pycaret_model.h5"
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
355 with h5py.File(hdf5_path, "w") as f:
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
356 with tempfile.NamedTemporaryFile(delete=False) as tmp:
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
357 joblib.dump(self.best_model, tmp.name)
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
358 tmp.seek(0)
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
359 model_bytes = tmp.read()
6
a32ff7201629 planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 3
diff changeset
360 f.create_dataset("model", data=np.void(model_bytes))
0
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
361
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
362 def generate_plots(self):
8
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
363 LOG.info("Generating PyCaret diagnostic pltos")
0
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
364
10
e2a6fed32d54 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 9
diff changeset
365 # choose the right plots based on task type
8
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
366 if self.task_type == "classification":
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
367 plot_names = [
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
368 "learning",
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
369 "vc",
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
370 "calibration",
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
371 "dimension",
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
372 "manifold",
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
373 "rfe",
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
374 "threshold",
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
375 "percentage_above_below",
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
376 "class_report",
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
377 "pr_auc",
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
378 "roc_auc",
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
379 ]
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
380 else:
10
e2a6fed32d54 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 9
diff changeset
381 plot_names = ["residuals", "vc", "parameter", "error",
e2a6fed32d54 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 9
diff changeset
382 "learning"]
8
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
383 for name in plot_names:
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
384 try:
10
e2a6fed32d54 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 9
diff changeset
385 ax = self.exp.plot_model(
e2a6fed32d54 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 9
diff changeset
386 self.best_model, plot=name, save=False
e2a6fed32d54 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 9
diff changeset
387 )
8
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
388 out_path = Path(self.output_dir) / f"plot_{name}.png"
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
389 fig = ax.get_figure()
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
390 fig.savefig(out_path, bbox_inches="tight")
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
391 self.plots[name] = str(out_path)
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
392 except Exception as e:
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
393 LOG.warning(f"Could not generate {name} plot: {e}")
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
394
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
395 def encode_image_to_base64(self, img_path: str) -> str:
6
a32ff7201629 planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 3
diff changeset
396 with open(img_path, "rb") as img_file:
a32ff7201629 planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 3
diff changeset
397 return base64.b64encode(img_file.read()).decode("utf-8")
0
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
398
17
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
399 def _build_dataset_overview(self):
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
400 """
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
401 Build an HTML table showing label counts with labels as rows and splits
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
402 (Train / Validation / Test) as columns. Each cell shows count and
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
403 percentage of that split. Returns empty string for regression or when
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
404 no label data is available.
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
405 """
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
406 if self.task_type != "classification":
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
407 return ""
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
408
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
409 def _safe_series(obj):
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
410 try:
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
411 return pd.Series(obj).reset_index(drop=True)
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
412 except Exception:
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
413 return None
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
414
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
415 def _get_from_config(keys):
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
416 if self.exp is None:
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
417 return None
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
418 for key in keys:
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
419 try:
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
420 val = self.exp.get_config(key)
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
421 except Exception:
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
422 val = getattr(self.exp, key, None)
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
423 if val is not None:
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
424 return val
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
425 return None
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
426
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
427 # Prefer PyCaret-configured splits; fall back to raw inputs.
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
428 X_train = _get_from_config(["X_train_transformed", "X_train"])
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
429 y_train = _get_from_config(["y_train_transformed", "y_train"])
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
430 y_test_cfg = _get_from_config(["y_test_transformed", "y_test"])
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
431
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
432 if y_train is None and self.data is not None and self.target in self.data.columns:
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
433 y_train = self.data[self.target]
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
434
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
435 y_train_series = _safe_series(y_train)
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
436
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
437 # Build a cross-validation generator to derive a validation subset size.
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
438 cv_gen = self._get_cv_generator(y_train_series)
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
439 y_train_fold = y_train_series
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
440 y_val_fold = None
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
441 if cv_gen is not None and y_train_series is not None:
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
442 try:
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
443 # Use the first fold to approximate Train/Validation split sizes.
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
444 splitter = cv_gen.split(
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
445 pd.DataFrame(X_train).reset_index(drop=True)
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
446 if X_train is not None
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
447 else y_train_series,
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
448 y_train_series,
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
449 )
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
450 train_idx, val_idx = next(iter(splitter))
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
451 y_train_fold = y_train_series.iloc[train_idx].reset_index(drop=True)
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
452 y_val_fold = y_train_series.iloc[val_idx].reset_index(drop=True)
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
453 except Exception as exc:
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
454 LOG.warning("Could not derive validation split for dataset overview: %s", exc)
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
455
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
456 # Test labels: prefer PyCaret transformed holdout (single file) or external test.
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
457 if self.test_data is not None:
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
458 if y_test_cfg is not None:
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
459 y_test = y_test_cfg
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
460 elif self.target in self.test_data.columns:
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
461 y_test = self.test_data[self.target]
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
462 else:
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
463 y_test = None
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
464 else:
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
465 y_test = y_test_cfg
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
466
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
467 split_map = {
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
468 "Train": _safe_series(y_train_fold),
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
469 "Validation": _safe_series(y_val_fold),
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
470 "Test": _safe_series(y_test),
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
471 }
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
472 available = {k: v for k, v in split_map.items() if v is not None and not v.empty}
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
473 if not available:
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
474 return ""
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
475
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
476 # Collect all labels across available splits (including NaN)
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
477 label_pool = pd.concat(
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
478 available.values(), ignore_index=True
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
479 )
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
480 labels = pd.unique(label_pool)
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
481
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
482 def _count_for_label(series, label):
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
483 if series is None or series.empty:
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
484 return None, None
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
485 total = len(series)
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
486 if pd.isna(label):
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
487 cnt = series.isna().sum()
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
488 else:
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
489 cnt = (series == label).sum()
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
490 return int(cnt), total
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
491
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
492 rows = []
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
493 for label in labels:
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
494 row = ["NaN" if pd.isna(label) else str(label)]
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
495 for split_name in ["Train", "Validation", "Test"]:
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
496 cnt, total = _count_for_label(split_map.get(split_name), label)
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
497 if cnt is None or total is None:
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
498 cell = "—"
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
499 else:
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
500 pct = (cnt / total * 100) if total else 0
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
501 cell = f"{cnt} ({pct:.1f}%)"
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
502 row.append(cell)
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
503 rows.append(row)
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
504
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
505 df = pd.DataFrame(rows, columns=["Label", "Train", "Validation", "Test"])
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
506 df.sort_values("Label", inplace=True)
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
507
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
508 return (
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
509 "<h2>Dataset Overview</h2>"
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
510 + '<div class="table-wrapper">'
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
511 + df.to_html(
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
512 index=False,
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
513 classes=["table", "sortable", "table-dataset-overview"],
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
514 )
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
515 + "</div>"
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
516 )
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
517
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
518 def _predict_with_thresholds(self, X, y_true):
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
519 """
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
520 Generate predictions/probabilities for a split, respecting an optional
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
521 probability threshold for binary tasks. Returns a dict with y_true,
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
522 y_pred, y_scores (positive-class probs when available), pos_label,
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
523 and neg_label.
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
524 """
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
525 if X is None or y_true is None:
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
526 return None
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
527
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
528 y_true_series = pd.Series(y_true).reset_index(drop=True)
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
529 classes = list(getattr(self.best_model, "classes_", []))
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
530 if not classes:
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
531 try:
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
532 classes = pd.unique(y_true_series).tolist()
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
533 except Exception:
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
534 classes = []
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
535 if len(classes) > 1:
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
536 try:
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
537 pos_idx = classes.index(1)
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
538 except Exception:
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
539 pos_idx = 1
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
540 else:
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
541 pos_idx = 0
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
542 pos_idx = min(pos_idx, len(classes) - 1) if classes else 0
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
543 pos_label = (
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
544 classes[pos_idx]
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
545 if len(classes) > pos_idx and pos_idx >= 0
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
546 else (classes[-1] if classes else 1)
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
547 )
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
548 neg_label = None
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
549 if len(classes) >= 2:
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
550 neg_candidates = [c for c in classes if c != pos_label]
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
551 if neg_candidates:
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
552 neg_label = neg_candidates[0]
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
553
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
554 prob_thresh = getattr(self, "probability_threshold", None)
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
555 y_scores = None
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
556 try:
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
557 proba = self.best_model.predict_proba(X)
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
558 y_scores = np.asarray(proba) if proba is not None else None
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
559 except Exception:
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
560 y_scores = None
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
561
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
562 try:
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
563 if (
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
564 prob_thresh is not None
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
565 and not getattr(self.exp, "is_multiclass", False)
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
566 and y_scores is not None
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
567 and y_scores.ndim == 2
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
568 and y_scores.shape[1] > 1
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
569 ):
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
570 pos_idx = min(pos_idx, y_scores.shape[1] - 1)
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
571 neg_idx = 1 - pos_idx if y_scores.shape[1] > 1 else 0
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
572 if neg_label is None and len(classes) > neg_idx:
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
573 neg_label = classes[neg_idx]
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
574 y_pred = np.where(
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
575 y_scores[:, pos_idx] >= prob_thresh,
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
576 pos_label,
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
577 neg_label if neg_label is not None else 0,
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
578 )
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
579 y_scores = y_scores[:, pos_idx]
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
580 else:
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
581 y_pred = self.best_model.predict(X)
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
582 if (
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
583 not getattr(self.exp, "is_multiclass", False)
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
584 and y_scores is not None
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
585 and y_scores.ndim == 2
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
586 and y_scores.shape[1] > 1
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
587 ):
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
588 pos_idx = min(pos_idx, y_scores.shape[1] - 1)
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
589 y_scores = y_scores[:, pos_idx]
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
590 except Exception as exc:
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
591 LOG.warning(
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
592 "Falling back to raw predict while computing performance summary: %s",
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
593 exc,
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
594 )
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
595 try:
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
596 y_pred = self.best_model.predict(X)
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
597 except Exception as exc_inner:
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
598 LOG.warning(
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
599 "Unable to score split after fallback prediction: %s",
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
600 exc_inner,
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
601 )
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
602 return None
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
603 y_scores = None
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
604
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
605 y_pred_series = pd.Series(y_pred).reset_index(drop=True)
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
606 if y_scores is not None:
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
607 y_scores = np.asarray(y_scores)
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
608 if y_scores.ndim > 1 and y_scores.shape[1] == 1:
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
609 y_scores = y_scores.ravel()
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
610 if getattr(self.exp, "is_multiclass", False) and y_scores.ndim > 1:
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
611 # Avoid passing multiclass score matrices to ROC/PR utilities
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
612 y_scores = None
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
613
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
614 return {
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
615 "y_true": y_true_series,
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
616 "y_pred": y_pred_series,
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
617 "y_scores": y_scores,
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
618 "pos_label": pos_label,
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
619 "neg_label": neg_label,
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
620 }
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
621
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
622 def _get_cv_generator(self, y_series):
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
623 """
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
624 Build a cross-validation splitter that mirrors the experiment's
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
625 configuration. Returns None when CV is disabled or not applicable.
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
626 """
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
627 if self.task_type != "classification":
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
628 return None
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
629
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
630 if getattr(self, "cross_validation", None) is False:
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
631 return None
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
632
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
633 try:
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
634 cfg_gen = self.exp.get_config("fold_generator")
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
635 if cfg_gen is not None:
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
636 return cfg_gen
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
637 except Exception:
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
638 cfg_gen = None
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
639
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
640 folds = (
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
641 getattr(self, "cross_validation_folds", None)
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
642 or self.setup_params.get("fold")
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
643 or getattr(self.exp, "fold", None)
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
644 or 10
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
645 )
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
646 try:
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
647 folds = int(folds)
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
648 except Exception:
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
649 folds = 10
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
650
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
651 try:
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
652 y_series = pd.Series(y_series).reset_index(drop=True)
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
653 except Exception:
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
654 y_series = None
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
655 if y_series is None or y_series.empty:
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
656 return None
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
657
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
658 if folds < 2:
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
659 return None
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
660 if len(y_series) < folds:
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
661 folds = len(y_series)
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
662 if folds < 2:
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
663 return None
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
664
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
665 try:
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
666 from sklearn.model_selection import KFold, StratifiedKFold
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
667
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
668 if self.task_type == "classification":
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
669 return StratifiedKFold(
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
670 n_splits=folds,
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
671 shuffle=True,
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
672 random_state=self.random_seed,
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
673 )
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
674 return KFold(
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
675 n_splits=folds,
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
676 shuffle=True,
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
677 random_state=self.random_seed,
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
678 )
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
679 except Exception as exc:
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
680 LOG.warning("Could not build CV generator: %s", exc)
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
681 return None
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
682
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
683 def _get_cross_validated_predictions(self, X, y):
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
684 """
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
685 Generate cross-validated predictions for the validation split so we
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
686 can report validation metrics for the selected best model.
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
687 """
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
688 if self.task_type != "classification":
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
689 return None
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
690 if getattr(self, "cross_validation", None) is False:
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
691 return None
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
692 if X is None or y is None:
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
693 return None
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
694
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
695 try:
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
696 from sklearn.model_selection import cross_val_predict
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
697 except Exception as exc:
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
698 LOG.warning("cross_val_predict unavailable: %s", exc)
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
699 return None
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
700
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
701 y_series = pd.Series(y).reset_index(drop=True)
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
702 if y_series.empty:
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
703 return None
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
704
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
705 cv_gen = self._get_cv_generator(y_series)
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
706 if cv_gen is None:
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
707 return None
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
708
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
709 X_df = pd.DataFrame(X).reset_index(drop=True)
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
710 if len(X_df) != len(y_series):
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
711 X_df = X_df.iloc[: len(y_series)].reset_index(drop=True)
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
712
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
713 classes = list(getattr(self.best_model, "classes_", []))
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
714 if len(classes) > 1:
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
715 try:
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
716 pos_idx = classes.index(1)
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
717 except Exception:
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
718 pos_idx = 1
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
719 else:
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
720 pos_idx = 0
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
721 pos_idx = min(pos_idx, len(classes) - 1) if classes else 0
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
722 pos_label = (
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
723 classes[pos_idx] if len(classes) > pos_idx else 1
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
724 )
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
725 neg_label = None
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
726 if len(classes) >= 2:
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
727 neg_candidates = [c for c in classes if c != pos_label]
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
728 if neg_candidates:
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
729 neg_label = neg_candidates[0]
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
730
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
731 prob_thresh = getattr(self, "probability_threshold", None)
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
732 n_jobs = getattr(self, "n_jobs", None)
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
733
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
734 y_scores = None
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
735 if not getattr(self.exp, "is_multiclass", False):
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
736 try:
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
737 proba = cross_val_predict(
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
738 self.best_model,
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
739 X_df,
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
740 y_series,
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
741 cv=cv_gen,
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
742 method="predict_proba",
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
743 n_jobs=n_jobs,
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
744 )
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
745 y_scores = np.asarray(proba)
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
746 except Exception as exc:
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
747 LOG.debug("Could not compute CV probabilities: %s", exc)
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
748
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
749 y_pred = None
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
750 if (
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
751 prob_thresh is not None
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
752 and not getattr(self.exp, "is_multiclass", False)
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
753 and y_scores is not None
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
754 and y_scores.ndim == 2
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
755 and y_scores.shape[1] > 1
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
756 ):
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
757 pos_idx = min(pos_idx, y_scores.shape[1] - 1)
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
758 neg_idx = 1 - pos_idx if y_scores.shape[1] > 1 else 0
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
759 if neg_label is None and len(classes) > neg_idx:
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
760 neg_label = classes[neg_idx]
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
761 y_pred = np.where(
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
762 y_scores[:, pos_idx] >= prob_thresh,
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
763 pos_label,
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
764 neg_label if neg_label is not None else 0,
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
765 )
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
766 y_scores = y_scores[:, pos_idx]
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
767 else:
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
768 try:
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
769 y_pred = cross_val_predict(
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
770 self.best_model,
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
771 X_df,
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
772 y_series,
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
773 cv=cv_gen,
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
774 method="predict",
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
775 n_jobs=n_jobs,
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
776 )
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
777 except Exception as exc:
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
778 LOG.warning(
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
779 "Could not compute cross-validated predictions: %s",
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
780 exc,
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
781 )
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
782 return None
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
783 if (
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
784 not getattr(self.exp, "is_multiclass", False)
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
785 and y_scores is not None
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
786 and y_scores.ndim == 2
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
787 and y_scores.shape[1] > 1
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
788 ):
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
789 pos_idx = min(pos_idx, y_scores.shape[1] - 1)
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
790 y_scores = y_scores[:, pos_idx]
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
791
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
792 if y_scores is not None and getattr(self.exp, "is_multiclass", False):
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
793 y_scores = None
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
794
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
795 return {
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
796 "y_true": y_series,
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
797 "y_pred": pd.Series(y_pred).reset_index(drop=True),
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
798 "y_scores": y_scores,
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
799 "pos_label": pos_label,
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
800 "neg_label": neg_label,
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
801 }
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
802
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
803 def _get_split_predictions_for_report(self):
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
804 """
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
805 Collect predictions/probabilities for Train/Validation/Test splits so the
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
806 performance table can show consistent metrics across splits.
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
807 """
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
808 if self.task_type != "classification":
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
809 return {}
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
810
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
811 def _get_from_config(keys):
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
812 for key in keys:
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
813 try:
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
814 val = self.exp.get_config(key)
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
815 except Exception:
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
816 val = getattr(self.exp, key, None)
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
817 if val is not None:
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
818 return val
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
819 return None
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
820
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
821 X_train = _get_from_config(["X_train_transformed", "X_train"])
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
822 y_train = _get_from_config(["y_train_transformed", "y_train"])
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
823 X_holdout = _get_from_config(["X_test_transformed", "X_test"])
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
824 y_holdout = _get_from_config(["y_test_transformed", "y_test"])
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
825
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
826 predictions = {}
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
827
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
828 # Train metrics (best model on training data)
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
829 if X_train is not None and y_train is not None:
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
830 try:
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
831 train_preds = self._predict_with_thresholds(X_train, y_train)
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
832 if train_preds is not None:
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
833 predictions["Train"] = train_preds
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
834 except Exception as exc:
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
835 LOG.warning(
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
836 "Could not score Train split for performance summary: %s",
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
837 exc,
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
838 )
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
839
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
840 # Validation metrics via cross-validation on training data
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
841 try:
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
842 val_preds = self._get_cross_validated_predictions(X_train, y_train)
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
843 if val_preds is not None:
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
844 predictions["Validation"] = val_preds
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
845 except Exception as exc:
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
846 LOG.warning(
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
847 "Could not score Validation split for performance summary: %s",
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
848 exc,
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
849 )
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
850
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
851 # Test metrics (holdout from single file, or provided test file)
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
852 X_test = X_holdout
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
853 y_test = y_holdout
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
854 if (X_test is None or y_test is None) and self.test_data is not None:
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
855 try:
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
856 X_test = self.test_data.drop(columns=[self.target])
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
857 y_test = self.test_data[self.target]
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
858 except Exception as exc:
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
859 LOG.warning(
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
860 "Could not prepare external test data for performance summary: %s",
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
861 exc,
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
862 )
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
863
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
864 if X_test is not None and y_test is not None:
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
865 try:
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
866 test_preds = self._predict_with_thresholds(X_test, y_test)
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
867 if test_preds is not None:
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
868 predictions["Test"] = test_preds
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
869 except Exception as exc:
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
870 LOG.warning(
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
871 "Could not score Test split for performance summary: %s",
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
872 exc,
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
873 )
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
874 return predictions
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
875
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
876 def _compute_metric_value(self, metric_name, preds, split_name):
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
877 """
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
878 Compute a single metric for a given split prediction bundle.
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
879 """
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
880 if preds is None:
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
881 return None
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
882
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
883 y_true = preds["y_true"]
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
884 y_pred = preds["y_pred"]
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
885 y_scores = preds.get("y_scores")
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
886 pos_label = preds.get("pos_label")
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
887 neg_label = preds.get("neg_label")
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
888 is_multiclass = getattr(self.exp, "is_multiclass", False)
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
889
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
890 def _format_binary_labels(series):
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
891 if pos_label is None:
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
892 return series
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
893 try:
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
894 return (series == pos_label).astype(int)
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
895 except Exception:
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
896 return series
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
897
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
898 try:
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
899 if metric_name == "Accuracy":
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
900 return accuracy_score(y_true, y_pred)
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
901 if metric_name == "ROC-AUC":
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
902 if y_scores is None:
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
903 return None
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
904 y_true_bin = _format_binary_labels(y_true)
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
905 if len(pd.unique(y_true_bin)) < 2:
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
906 return None
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
907 return roc_auc_score(y_true_bin, y_scores)
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
908 if metric_name == "Precision":
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
909 if is_multiclass:
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
910 return precision_score(
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
911 y_true, y_pred, average="weighted", zero_division=0
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
912 )
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
913 try:
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
914 return precision_score(
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
915 y_true, y_pred, pos_label=pos_label, zero_division=0
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
916 )
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
917 except Exception:
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
918 return precision_score(
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
919 y_true, y_pred, average="weighted", zero_division=0
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
920 )
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
921 if metric_name == "Recall":
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
922 if is_multiclass:
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
923 return recall_score(
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
924 y_true, y_pred, average="weighted", zero_division=0
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
925 )
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
926 try:
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
927 return recall_score(
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
928 y_true, y_pred, pos_label=pos_label, zero_division=0
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
929 )
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
930 except Exception:
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
931 return recall_score(
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
932 y_true, y_pred, average="weighted", zero_division=0
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
933 )
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
934 if metric_name == "F1-Score":
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
935 if is_multiclass:
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
936 return f1_score(
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
937 y_true, y_pred, average="weighted", zero_division=0
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
938 )
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
939 try:
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
940 return f1_score(
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
941 y_true, y_pred, pos_label=pos_label, zero_division=0
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
942 )
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
943 except Exception:
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
944 return f1_score(
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
945 y_true, y_pred, average="weighted", zero_division=0
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
946 )
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
947 if metric_name == "PR-AUC":
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
948 if y_scores is None:
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
949 return None
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
950 y_true_bin = _format_binary_labels(y_true)
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
951 if len(pd.unique(y_true_bin)) < 2:
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
952 return None
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
953 return average_precision_score(y_true_bin, y_scores)
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
954 if metric_name == "Specificity":
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
955 labels = pd.unique(pd.concat([y_true, y_pred], ignore_index=True))
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
956 if len(labels) != 2:
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
957 return None
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
958 if pos_label is None or pos_label not in labels:
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
959 pos_label = labels[1]
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
960 neg_candidates = [lbl for lbl in labels if lbl != pos_label]
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
961 neg_label_final = (
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
962 neg_label if neg_label in labels else (neg_candidates[0] if neg_candidates else None)
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
963 )
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
964 if neg_label_final is None:
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
965 return None
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
966 cm = confusion_matrix(
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
967 y_true, y_pred, labels=[neg_label_final, pos_label]
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
968 )
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
969 if cm.shape != (2, 2):
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
970 return None
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
971 tn, fp, fn, tp = cm.ravel()
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
972 denom = tn + fp
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
973 return (tn / denom) if denom else None
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
974 if metric_name == "MCC":
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
975 return matthews_corrcoef(y_true, y_pred)
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
976 except Exception as exc:
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
977 LOG.warning(
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
978 "Could not compute %s for %s split: %s",
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
979 metric_name,
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
980 split_name,
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
981 exc,
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
982 )
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
983 return None
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
984 return None
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
985
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
986 def _build_performance_summary_table(self):
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
987 """
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
988 Build a Train/Validation/Test metrics table for classification tasks.
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
989 Returns empty string when metrics are unavailable or not applicable.
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
990 """
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
991 if self.task_type != "classification":
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
992 return ""
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
993
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
994 split_predictions = self._get_split_predictions_for_report()
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
995 validation_best_row = None
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
996 try:
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
997 if isinstance(self.results, pd.DataFrame) and not self.results.empty:
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
998 validation_best_row = self.results.iloc[0]
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
999 except Exception:
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
1000 validation_best_row = None
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
1001
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
1002 if not split_predictions and validation_best_row is None:
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
1003 return ""
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
1004
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
1005 metric_names = [
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
1006 "Accuracy",
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
1007 "ROC-AUC",
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
1008 "Precision",
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
1009 "Recall",
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
1010 "F1-Score",
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
1011 "PR-AUC",
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
1012 "Specificity",
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
1013 "MCC",
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
1014 ]
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
1015
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
1016 validation_column_map = {
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
1017 "Accuracy": ["Accuracy"],
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
1018 "ROC-AUC": ["ROC-AUC", "AUC"],
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
1019 "Precision": ["Precision", "Prec.", "Prec"],
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
1020 "Recall": ["Recall"],
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
1021 "F1-Score": ["F1-Score", "F1"],
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
1022 "PR-AUC": ["PR-AUC", "PR-AUC-Weighted", "PRC"],
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
1023 "Specificity": ["Specificity"],
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
1024 "MCC": ["MCC"],
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
1025 }
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
1026
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
1027 def _fmt(value):
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
1028 if value is None:
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
1029 return "—"
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
1030 try:
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
1031 if isinstance(value, (float, np.floating)) and (
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
1032 np.isnan(value) or np.isinf(value)
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
1033 ):
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
1034 return "—"
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
1035 return f"{value:.3f}"
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
1036 except Exception:
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
1037 return str(value)
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
1038
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
1039 def _validation_metric(metric_name):
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
1040 if validation_best_row is None:
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
1041 return None
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
1042 cols = validation_column_map.get(metric_name, [])
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
1043 for col in cols:
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
1044 if col in validation_best_row:
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
1045 try:
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
1046 return validation_best_row[col]
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
1047 except Exception:
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
1048 return None
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
1049 return None
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
1050
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
1051 rows = []
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
1052 for metric in metric_names:
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
1053 row = [metric]
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
1054 # Train
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
1055 train_val = self._compute_metric_value(
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
1056 metric, split_predictions.get("Train"), "Train"
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
1057 )
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
1058 row.append(_fmt(train_val))
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
1059
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
1060 # Validation from Train & Validation Summary first row; fallback to computed CV.
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
1061 val_val = _validation_metric(metric)
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
1062 if val_val is None:
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
1063 val_val = self._compute_metric_value(
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
1064 metric, split_predictions.get("Validation"), "Validation"
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
1065 )
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
1066 row.append(_fmt(val_val))
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
1067
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
1068 # Test
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
1069 test_val = self._compute_metric_value(
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
1070 metric, split_predictions.get("Test"), "Test"
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
1071 )
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
1072 row.append(_fmt(test_val))
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
1073 rows.append(row)
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
1074
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
1075 df = pd.DataFrame(rows, columns=["Metric", "Train", "Validation", "Test"])
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
1076 return (
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
1077 "<h2>Model Performance Summary</h2>"
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
1078 + '<div class="table-wrapper">'
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
1079 + df.to_html(
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
1080 index=False,
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
1081 classes=["table", "sortable", "table-perf-summary"],
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
1082 )
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
1083 + "</div>"
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
1084 )
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
1085
16
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 14
diff changeset
1086 def _resolve_plot_callable(self, key, fig_or_fn, section):
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 14
diff changeset
1087 """
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 14
diff changeset
1088 Safely execute stored plot callables so a single failure does not
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 14
diff changeset
1089 abort the entire HTML report generation.
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 14
diff changeset
1090 """
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 14
diff changeset
1091 if fig_or_fn is None:
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 14
diff changeset
1092 return None
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 14
diff changeset
1093 try:
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 14
diff changeset
1094 return fig_or_fn() if callable(fig_or_fn) else fig_or_fn
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 14
diff changeset
1095 except Exception as exc:
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 14
diff changeset
1096 extra = ""
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 14
diff changeset
1097 if isinstance(exc, ValueError) and "Input contains NaN" in str(exc):
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 14
diff changeset
1098 extra = (
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 14
diff changeset
1099 " (model returned NaN probabilities; "
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 14
diff changeset
1100 "consider checking data preprocessing)"
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 14
diff changeset
1101 )
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 14
diff changeset
1102 LOG.warning(
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 14
diff changeset
1103 "Skipping %s plot '%s' due to error: %s%s",
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 14
diff changeset
1104 section,
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 14
diff changeset
1105 key,
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 14
diff changeset
1106 exc,
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 14
diff changeset
1107 extra,
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 14
diff changeset
1108 )
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 14
diff changeset
1109 return None
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 14
diff changeset
1110
0
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
1111 def save_html_report(self):
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
1112 LOG.info("Saving HTML report")
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
1113
8
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
1114 # 1) Determine best model name
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
1115 try:
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
1116 best_model_name = str(self.results.iloc[0]["Model"])
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
1117 except Exception:
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
1118 best_model_name = type(self.best_model).__name__
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
1119 LOG.info(f"Best model determined as: {best_model_name}")
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
1120
10
e2a6fed32d54 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 9
diff changeset
1121 # 2) Compute training sample count
8
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
1122 try:
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
1123 n_train = self.exp.X_train.shape[0]
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
1124 except Exception:
10
e2a6fed32d54 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 9
diff changeset
1125 n_train = getattr(
e2a6fed32d54 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 9
diff changeset
1126 self.exp, "X_train_transformed", pd.DataFrame()
e2a6fed32d54 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 9
diff changeset
1127 ).shape[0]
8
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
1128 total_rows = self.data.shape[0]
6
a32ff7201629 planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 3
diff changeset
1129
8
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
1130 # 3) Build setup parameters table
9
c6c1f8777aae planemo upload for repository https://github.com/goeckslab/gleam commit 4a11e8a4c4e9daa884bddedfa47090476c517667
goeckslab
parents: 8
diff changeset
1131 all_params = self.setup_params.copy()
10
e2a6fed32d54 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 9
diff changeset
1132 if self.task_type == "classification" and (
e2a6fed32d54 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 9
diff changeset
1133 hasattr(self, "probability_threshold")
e2a6fed32d54 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 9
diff changeset
1134 ):
e2a6fed32d54 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 9
diff changeset
1135 all_params["probability_threshold"] = (
e2a6fed32d54 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 9
diff changeset
1136 self.probability_threshold
e2a6fed32d54 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 9
diff changeset
1137 )
8
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
1138 display_keys = [
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
1139 "Target",
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
1140 "Session ID",
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
1141 "Train Size",
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
1142 "Normalize",
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
1143 "Feature Selection",
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
1144 "Cross Validation",
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
1145 "Cross Validation Folds",
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
1146 "Remove Outliers",
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
1147 "Remove Multicollinearity",
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
1148 "Polynomial Features",
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
1149 "Fix Imbalance",
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
1150 "Models",
9
c6c1f8777aae planemo upload for repository https://github.com/goeckslab/gleam commit 4a11e8a4c4e9daa884bddedfa47090476c517667
goeckslab
parents: 8
diff changeset
1151 "Probability Threshold",
8
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
1152 ]
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
1153 setup_rows = []
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
1154 for key in display_keys:
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
1155 pk = key.lower().replace(" ", "_")
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
1156 v = all_params.get(pk)
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
1157 if key == "Train Size":
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
1158 frac = (
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
1159 float(v)
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
1160 if v is not None
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
1161 else (n_train / total_rows if total_rows else 0)
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
1162 )
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
1163 dv = f"{frac:.2f} ({n_train} rows)"
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
1164 elif key in {
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
1165 "Normalize",
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
1166 "Feature Selection",
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
1167 "Cross Validation",
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
1168 "Remove Outliers",
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
1169 "Remove Multicollinearity",
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
1170 "Polynomial Features",
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
1171 "Fix Imbalance",
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
1172 }:
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
1173 dv = bool(v)
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
1174 elif key == "Cross Validation Folds":
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
1175 dv = v if v is not None else "None"
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
1176 elif key == "Models":
10
e2a6fed32d54 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 9
diff changeset
1177 dv = ", ".join(map(str, v)) if isinstance(
e2a6fed32d54 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 9
diff changeset
1178 v, (list, tuple)
e2a6fed32d54 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 9
diff changeset
1179 ) else "None"
9
c6c1f8777aae planemo upload for repository https://github.com/goeckslab/gleam commit 4a11e8a4c4e9daa884bddedfa47090476c517667
goeckslab
parents: 8
diff changeset
1180 elif key == "Probability Threshold":
10
e2a6fed32d54 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 9
diff changeset
1181 dv = f"{v:.2f}" if v is not None else "0.5"
8
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
1182 else:
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
1183 dv = v if v is not None else "None"
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
1184 setup_rows.append([key, dv])
16
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 14
diff changeset
1185 metric_label = self._best_model_metric_used or getattr(
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 14
diff changeset
1186 self.exp, "_fold_metric", None
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 14
diff changeset
1187 )
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 14
diff changeset
1188 if metric_label:
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 14
diff changeset
1189 setup_rows.append(["Best Model Metric", metric_label])
8
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
1190
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
1191 df_setup = pd.DataFrame(setup_rows, columns=["Parameter", "Value"])
10
e2a6fed32d54 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 9
diff changeset
1192 df_setup.to_csv(
e2a6fed32d54 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 9
diff changeset
1193 Path(self.output_dir) / "setup_params.csv", index=False
e2a6fed32d54 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 9
diff changeset
1194 )
8
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
1195
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
1196 # 4) Persist CSVs
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
1197 self.results.to_csv(
10
e2a6fed32d54 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 9
diff changeset
1198 Path(self.output_dir) / "comparison_results.csv",
e2a6fed32d54 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 9
diff changeset
1199 index=False
8
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
1200 )
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
1201 self.test_result_df.to_csv(
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
1202 Path(self.output_dir) / "test_results.csv", index=False
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
1203 )
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
1204 pd.DataFrame(
10
e2a6fed32d54 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 9
diff changeset
1205 self.best_model.get_params().items(),
e2a6fed32d54 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 9
diff changeset
1206 columns=["Parameter", "Value"]
8
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
1207 ).to_csv(Path(self.output_dir) / "best_model.csv", index=False)
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
1208
10
e2a6fed32d54 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 9
diff changeset
1209 if self.tuning_results is not None:
e2a6fed32d54 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 9
diff changeset
1210 self.tuning_results.to_csv(
e2a6fed32d54 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 9
diff changeset
1211 Path(self.output_dir) / "tuning_results.csv",
e2a6fed32d54 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 9
diff changeset
1212 index=False
e2a6fed32d54 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 9
diff changeset
1213 )
e2a6fed32d54 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 9
diff changeset
1214
8
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
1215 # 5) Header
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
1216 header = f"<h2>Best Model: {best_model_name}</h2>"
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
1217
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
1218 # — Validation Summary & Configuration —
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
1219 val_df = self.results.copy()
17
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
1220 dataset_overview_html = self._build_dataset_overview()
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
1221 performance_summary_html = self._build_performance_summary_table()
8
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
1222 # mapping raw plot keys to user-friendly titles
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
1223 plot_title_map = {
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
1224 "learning": "Learning Curve",
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
1225 "vc": "Validation Curve",
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
1226 "calibration": "Calibration Curve",
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
1227 "dimension": "Dimensionality Reduction",
17
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
1228 "manifold": "t-SNE",
8
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
1229 "rfe": "Recursive Feature Elimination",
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
1230 "threshold": "Threshold Plot",
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
1231 "percentage_above_below": "Percentage Above vs. Below Cutoff",
17
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
1232 "class_report": "Per-Class Metrics",
8
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
1233 "pr_auc": "Precision-Recall AUC",
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
1234 "roc_auc": "Receiver Operating Characteristic AUC",
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
1235 "residuals": "Residuals Distribution",
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
1236 "error": "Prediction Error Distribution",
0
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
1237 }
10
e2a6fed32d54 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 9
diff changeset
1238 val_df.drop(
e2a6fed32d54 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 9
diff changeset
1239 columns=["TT (Ec)", "TT (Sec)"], errors="ignore", inplace=True
e2a6fed32d54 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 9
diff changeset
1240 )
8
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
1241 summary_html = (
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
1242 header
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
1243 + "<h2>Train & Validation Summary</h2>"
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
1244 + '<div class="table-wrapper">'
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
1245 + val_df.to_html(index=False, classes="table sortable")
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
1246 + "</div>"
10
e2a6fed32d54 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 9
diff changeset
1247 )
e2a6fed32d54 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 9
diff changeset
1248
e2a6fed32d54 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 9
diff changeset
1249 if self.tuning_results is not None:
e2a6fed32d54 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 9
diff changeset
1250 tuning_df = self.tuning_results.copy()
e2a6fed32d54 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 9
diff changeset
1251 tuning_df.drop(
e2a6fed32d54 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 9
diff changeset
1252 columns=["TT (Sec)"], errors="ignore", inplace=True
e2a6fed32d54 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 9
diff changeset
1253 )
e2a6fed32d54 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 9
diff changeset
1254 summary_html += (
e2a6fed32d54 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 9
diff changeset
1255 f"<h2>{best_model_name}: Tuning Summary</h2>"
e2a6fed32d54 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 9
diff changeset
1256 + '<div class="table-wrapper">'
e2a6fed32d54 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 9
diff changeset
1257 + tuning_df.to_html(index=False, classes="table sortable")
e2a6fed32d54 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 9
diff changeset
1258 + "</div>"
e2a6fed32d54 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 9
diff changeset
1259 )
e2a6fed32d54 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 9
diff changeset
1260
17
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
1261 config_html = (
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
1262 header
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
1263 + dataset_overview_html
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
1264 + performance_summary_html
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
1265 + "<h2>Setup Parameters</h2>"
8
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
1266 + '<div class="table-wrapper">'
17
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
1267 + df_setup.to_html(
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
1268 index=False,
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
1269 classes=["table", "sortable", "table-setup-params"],
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
1270 )
8
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
1271 + "</div>"
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
1272 # — Hyperparameters
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
1273 + "<h2>Best Model Hyperparameters</h2>"
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
1274 + '<div class="table-wrapper">'
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
1275 + pd.DataFrame(
10
e2a6fed32d54 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 9
diff changeset
1276 self.best_model.get_params().items(),
e2a6fed32d54 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 9
diff changeset
1277 columns=["Parameter", "Value"]
17
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
1278 ).to_html(
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
1279 index=False,
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
1280 classes=["table", "sortable", "table-hyperparams"],
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
1281 )
8
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
1282 + "</div>"
3
ccd798db5abb planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit cf47efb521b91a9cb44ae5c5ade860627f9b9030
goeckslab
parents: 2
diff changeset
1283 )
0
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
1284
8
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
1285 # choose summary plots based on task type
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
1286 if self.task_type == "classification":
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
1287 summary_plots = [
17
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
1288 "threshold",
8
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
1289 "learning",
17
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
1290 "calibration",
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
1291 "rfe",
8
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
1292 "vc",
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
1293 "dimension",
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
1294 "manifold",
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
1295 "percentage_above_below",
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
1296 ]
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
1297 else:
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
1298 summary_plots = ["learning", "vc", "parameter", "residuals"]
0
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
1299
8
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
1300 for name in summary_plots:
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
1301 if name in self.plots:
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
1302 summary_html += "<hr>"
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
1303 b64 = encode_image_to_base64(self.plots[name])
10
e2a6fed32d54 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 9
diff changeset
1304 title = plot_title_map.get(
e2a6fed32d54 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 9
diff changeset
1305 name, name.replace("_", " ").title()
e2a6fed32d54 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 9
diff changeset
1306 )
8
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
1307 summary_html += (
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
1308 '<div class="plot">'
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
1309 f"<h2>{title}</h2>"
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
1310 f'<img src="data:image/png;base64,{b64}" '
10
e2a6fed32d54 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 9
diff changeset
1311 'style="max-width:90%;max-height:600px;'
e2a6fed32d54 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 9
diff changeset
1312 'border:1px solid #ddd;"/>'
8
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
1313 "</div>"
6
a32ff7201629 planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 3
diff changeset
1314 )
0
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
1315
8
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
1316 # — Test Summary —
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
1317 test_html = (
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
1318 header
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
1319 + '<div class="table-wrapper">'
10
e2a6fed32d54 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 9
diff changeset
1320 + self.test_result_df.to_html(
e2a6fed32d54 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 9
diff changeset
1321 index=False, classes="table sortable"
e2a6fed32d54 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 9
diff changeset
1322 )
8
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
1323 + "</div>"
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
1324 )
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
1325 if self.task_type == "regression":
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
1326 try:
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
1327 y_true = (
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
1328 pd.Series(self.exp.y_test_transformed)
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
1329 .reset_index(drop=True)
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
1330 .rename("True")
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
1331 )
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
1332 y_pred = pd.Series(
10
e2a6fed32d54 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 9
diff changeset
1333 self.best_model.predict(
e2a6fed32d54 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 9
diff changeset
1334 self.exp.X_test_transformed
e2a6fed32d54 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 9
diff changeset
1335 )
8
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
1336 ).rename("Predicted")
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
1337 df_tp = pd.concat([y_true, y_pred], axis=1)
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
1338 test_html += "<h2>True vs Predicted Values</h2>"
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
1339 test_html += (
10
e2a6fed32d54 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 9
diff changeset
1340 '<div class="table-wrapper" '
e2a6fed32d54 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 9
diff changeset
1341 'style="max-height:400px; overflow-y:auto;">'
e2a6fed32d54 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 9
diff changeset
1342 + df_tp.head(50).to_html(
e2a6fed32d54 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 9
diff changeset
1343 index=False, classes="table sortable"
e2a6fed32d54 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 9
diff changeset
1344 )
8
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
1345 + "</div>"
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
1346 + add_hr_to_html()
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
1347 )
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
1348 except Exception as e:
10
e2a6fed32d54 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 9
diff changeset
1349 LOG.warning(
e2a6fed32d54 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 9
diff changeset
1350 f"Could not generate True vs Predicted table: {e}"
e2a6fed32d54 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 9
diff changeset
1351 )
8
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
1352
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
1353 # 5a) Explainer-substituted plots in order
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
1354 if self.task_type == "regression":
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
1355 test_order = ["residuals"]
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
1356 else:
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
1357 test_order = [
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
1358 "confusion_matrix",
17
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
1359 "class_report",
8
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
1360 "roc_auc",
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
1361 "pr_auc",
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
1362 "lift_curve",
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
1363 "cumulative_precision",
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
1364 ]
17
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
1365 rendered_test_plots = set()
8
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
1366 for key in test_order:
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
1367 fig_or_fn = self.explainer_plots.pop(key, None)
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
1368 if fig_or_fn is not None:
16
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 14
diff changeset
1369 fig = self._resolve_plot_callable(
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 14
diff changeset
1370 key, fig_or_fn, section="test/explainer"
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 14
diff changeset
1371 )
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 14
diff changeset
1372 if fig is None:
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 14
diff changeset
1373 continue
17
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
1374 rendered_test_plots.add(key)
10
e2a6fed32d54 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 9
diff changeset
1375 title = plot_title_map.get(
e2a6fed32d54 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 9
diff changeset
1376 key, key.replace("_", " ").title()
e2a6fed32d54 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 9
diff changeset
1377 )
8
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
1378 test_html += (
10
e2a6fed32d54 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 9
diff changeset
1379 f"<h2>{title}</h2>" + add_plot_to_html(fig)
e2a6fed32d54 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 9
diff changeset
1380 + add_hr_to_html()
8
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
1381 )
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
1382 # 5b) Remaining PyCaret test plots
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
1383 for name, path in self.plots.items():
10
e2a6fed32d54 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 9
diff changeset
1384 # classification: include only the small extras, before
e2a6fed32d54 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 9
diff changeset
1385 # skipping anything
e2a6fed32d54 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 9
diff changeset
1386 if self.task_type == "classification" and (
e2a6fed32d54 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 9
diff changeset
1387 name in {
e2a6fed32d54 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 9
diff changeset
1388 "pr_auc",
e2a6fed32d54 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 9
diff changeset
1389 "class_report",
e2a6fed32d54 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 9
diff changeset
1390 }
e2a6fed32d54 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 9
diff changeset
1391 ):
17
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
1392 if name in rendered_test_plots:
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
1393 continue
10
e2a6fed32d54 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 9
diff changeset
1394 title = plot_title_map.get(
e2a6fed32d54 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 9
diff changeset
1395 name, name.replace("_", " ").title()
e2a6fed32d54 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 9
diff changeset
1396 )
8
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
1397 b64 = encode_image_to_base64(path)
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
1398 test_html += (
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
1399 f"<h2>{title}</h2>"
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
1400 "<div class='plot'>"
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
1401 f"<img src='data:image/png;base64,{b64}' "
10
e2a6fed32d54 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 9
diff changeset
1402 "style='max-width:90%;max-height:600px;"
e2a6fed32d54 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 9
diff changeset
1403 "border:1px solid #ddd;'/>"
8
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
1404 "</div>" + add_hr_to_html()
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
1405 )
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
1406 continue
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
1407
10
e2a6fed32d54 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 9
diff changeset
1408 # regression: explicitly include the 'error' plot,
e2a6fed32d54 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 9
diff changeset
1409 # before skipping
e2a6fed32d54 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 9
diff changeset
1410 if self.task_type == "regression" and (
e2a6fed32d54 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 9
diff changeset
1411 name == "error"
e2a6fed32d54 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 9
diff changeset
1412 ):
e2a6fed32d54 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 9
diff changeset
1413 title = plot_title_map.get(
e2a6fed32d54 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 9
diff changeset
1414 "error", "Prediction Error Distribution"
e2a6fed32d54 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 9
diff changeset
1415 )
8
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
1416 b64 = encode_image_to_base64(path)
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
1417 test_html += (
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
1418 f"<h2>{title}</h2>"
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
1419 "<div class='plot'>"
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
1420 f"<img src='data:image/png;base64,{b64}' "
10
e2a6fed32d54 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 9
diff changeset
1421 "style='max-width:90%;max-height:600px;"
e2a6fed32d54 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 9
diff changeset
1422 "border:1px solid #ddd;'/>"
8
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
1423 "</div>" + add_hr_to_html()
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
1424 )
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
1425 continue
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
1426
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
1427 # now skip any plots already rendered via test_order
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
1428 if name in test_order:
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
1429 continue
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
1430
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
1431 # — Feature Importance —
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
1432 feature_html = header
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
1433
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
1434 # 6a) PyCaret’s default feature importances
16
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 14
diff changeset
1435 imputed_data = (
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 14
diff changeset
1436 self.imputed_training_data
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 14
diff changeset
1437 if self.imputed_training_data is not None
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 14
diff changeset
1438 else self.data
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 14
diff changeset
1439 )
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 14
diff changeset
1440 fi_analyzer = FeatureImportanceAnalyzer(
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 14
diff changeset
1441 data=imputed_data,
0
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
1442 target_col=self.target_col,
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
1443 task_type=self.task_type,
3
ccd798db5abb planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit cf47efb521b91a9cb44ae5c5ade860627f9b9030
goeckslab
parents: 2
diff changeset
1444 output_dir=self.output_dir,
6
a32ff7201629 planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 3
diff changeset
1445 exp=self.exp,
a32ff7201629 planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 3
diff changeset
1446 best_model=self.best_model,
16
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 14
diff changeset
1447 max_plot_features=self.plot_feature_limit,
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 14
diff changeset
1448 processed_data=self.imputed_training_data,
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 14
diff changeset
1449 max_shap_rows=self._shap_row_cap,
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 14
diff changeset
1450 )
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 14
diff changeset
1451 fi_html = fi_analyzer.run()
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 14
diff changeset
1452 # Add a small table to show SHAP feature caps near the Best Model header.
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 14
diff changeset
1453 cap_rows = []
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 14
diff changeset
1454 if fi_analyzer.shap_total_features is not None:
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 14
diff changeset
1455 cap_rows.append(
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 14
diff changeset
1456 ("Total transformed features", fi_analyzer.shap_total_features)
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 14
diff changeset
1457 )
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 14
diff changeset
1458 if fi_analyzer.shap_used_features is not None:
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 14
diff changeset
1459 cap_rows.append(
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 14
diff changeset
1460 ("Features used in SHAP", fi_analyzer.shap_used_features)
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 14
diff changeset
1461 )
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 14
diff changeset
1462 if cap_rows:
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 14
diff changeset
1463 cap_table = (
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 14
diff changeset
1464 "<div class='table-wrapper'>"
17
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
1465 "<table class='table sortable table-fi-scope'>"
16
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 14
diff changeset
1466 "<thead><tr><th>Feature Importance Scope</th><th>Count</th></tr></thead>"
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 14
diff changeset
1467 "<tbody>"
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 14
diff changeset
1468 + "".join(
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 14
diff changeset
1469 f"<tr><td>{label}</td><td>{value}</td></tr>"
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 14
diff changeset
1470 for label, value in cap_rows
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 14
diff changeset
1471 )
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 14
diff changeset
1472 + "</tbody></table></div>"
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 14
diff changeset
1473 )
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 14
diff changeset
1474 feature_html += cap_table
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 14
diff changeset
1475 feature_html += fi_html
0
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
1476
8
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
1477 # 6b) Explainer SHAP importances
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
1478 for key in ["shap_mean", "shap_perm"]:
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
1479 fig_or_fn = self.explainer_plots.pop(key, None)
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
1480 if fig_or_fn is not None:
16
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 14
diff changeset
1481 fig = self._resolve_plot_callable(
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 14
diff changeset
1482 key, fig_or_fn, section="feature importance"
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 14
diff changeset
1483 )
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 14
diff changeset
1484 if fig is None:
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 14
diff changeset
1485 continue
8
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
1486 # give SHAP plots explicit titles
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
1487 title = (
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
1488 "Mean Absolute SHAP Value Impact"
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
1489 if key == "shap_mean"
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
1490 else "Permutation Feature Importance"
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
1491 )
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
1492 feature_html += (
10
e2a6fed32d54 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 9
diff changeset
1493 f"<h2>{title}</h2>" + add_plot_to_html(fig)
e2a6fed32d54 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 9
diff changeset
1494 + add_hr_to_html()
8
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
1495 )
6
a32ff7201629 planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 3
diff changeset
1496
8
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
1497 # 6c) PDPs last
10
e2a6fed32d54 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 9
diff changeset
1498 pdp_keys = sorted(
e2a6fed32d54 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 9
diff changeset
1499 k for k in self.explainer_plots if k.startswith("pdp__")
e2a6fed32d54 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 9
diff changeset
1500 )
8
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
1501 for k in pdp_keys:
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
1502 fig_or_fn = self.explainer_plots[k]
16
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 14
diff changeset
1503 fig = self._resolve_plot_callable(
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 14
diff changeset
1504 k, fig_or_fn, section="pdp"
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 14
diff changeset
1505 )
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 14
diff changeset
1506 if fig is None:
4fee4504646e planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 14
diff changeset
1507 continue
8
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
1508 # extract feature name
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
1509 feature = k.split("__", 1)[1]
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
1510 title = f"Partial Dependence for {feature}"
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
1511 feature_html += (
10
e2a6fed32d54 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 9
diff changeset
1512 f"<h2>{title}</h2>" + add_plot_to_html(fig)
e2a6fed32d54 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 9
diff changeset
1513 + add_hr_to_html()
6
a32ff7201629 planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 3
diff changeset
1514 )
8
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
1515 # 7) Assemble final HTML (three tabs)
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
1516 html = get_html_template()
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
1517 html += "<h1>Tabular Learner Model Report</h1>"
17
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
1518 html += build_tabbed_html(
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
1519 summary_html,
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
1520 test_html,
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
1521 feature_html,
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
1522 explainer_html=None,
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
1523 config_html=config_html,
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
1524 )
8
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
1525 html += get_feature_metrics_help_modal()
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
1526 html += get_html_closing()
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
1527
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
1528 # 8) Write out
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
1529 (Path(self.output_dir) / "comparison_result.html").write_text(
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
1530 html, encoding="utf-8"
6
a32ff7201629 planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 3
diff changeset
1531 )
10
e2a6fed32d54 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 9
diff changeset
1532 LOG.info(
e2a6fed32d54 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 9
diff changeset
1533 f"HTML report generated at: "
e2a6fed32d54 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 9
diff changeset
1534 f"{self.output_dir}/comparison_result.html"
e2a6fed32d54 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 9
diff changeset
1535 )
0
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
1536
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
1537 def save_dashboard(self):
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
1538 raise NotImplementedError("Subclasses should implement this method")
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
1539
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
1540 def generate_plots_explainer(self):
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
1541 raise NotImplementedError("Subclasses should implement this method")
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
1542
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
1543 def generate_tree_plots(self):
17
c5c324ac29fc planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 16
diff changeset
1544 from explainerdashboard.explainers import RandomForestExplainer
10
e2a6fed32d54 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 9
diff changeset
1545 from sklearn.ensemble import (
e2a6fed32d54 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 9
diff changeset
1546 RandomForestClassifier, RandomForestRegressor
e2a6fed32d54 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 9
diff changeset
1547 )
0
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
1548 from xgboost import XGBClassifier, XGBRegressor
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
1549
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
1550 LOG.info("Generating tree plots")
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
1551 X_test = self.exp.X_test_transformed.copy()
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
1552 y_test = self.exp.y_test_transformed
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
1553
10
e2a6fed32d54 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 9
diff changeset
1554 if isinstance(
e2a6fed32d54 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 9
diff changeset
1555 self.best_model, (RandomForestClassifier, RandomForestRegressor)
e2a6fed32d54 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 9
diff changeset
1556 ):
8
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
1557 n_trees = self.best_model.n_estimators
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
1558 elif isinstance(self.best_model, (XGBClassifier, XGBRegressor)):
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
1559 n_trees = len(self.best_model.get_booster().get_dump())
6
a32ff7201629 planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 3
diff changeset
1560 else:
a32ff7201629 planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 3
diff changeset
1561 LOG.warning("Tree plots not supported for this model type.")
a32ff7201629 planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 3
diff changeset
1562 return
0
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
1563
8
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
1564 explainer = RandomForestExplainer(self.best_model, X_test, y_test)
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
1565 for i in range(n_trees):
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
1566 fig = explainer.decisiontree_encoded(tree_idx=i, index=0)
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
1567 self.trees.append(fig)
0
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
1568
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
1569 def run(self):
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
1570 self.load_data()
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
1571 self.setup_pycaret()
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
1572 self.train_model()
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
1573 self.save_model()
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
1574 self.generate_plots()
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
1575 self.generate_plots_explainer()
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
1576 self.generate_tree_plots()
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
1577 self.save_html_report()
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
1578 # self.save_dashboard()