annotate base_model_trainer.py @ 13:bf0df21a1ea3 draft default tip

planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
author goeckslab
date Sat, 06 Dec 2025 14:20:23 +0000
parents 15707141e7da
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
1 import base64
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
2 import logging
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
3 import tempfile
4
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
4 from pathlib import Path
0
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
5
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
6 import h5py
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
7 import joblib
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
8 import numpy as np
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
9 import pandas as pd
2
77c88226bfde planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
10 from feature_help_modal import get_feature_metrics_help_modal
0
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
11 from feature_importance import FeatureImportanceAnalyzer
13
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
12 from sklearn.metrics import (
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
13 accuracy_score,
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
14 average_precision_score,
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
15 confusion_matrix,
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
16 f1_score,
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
17 matthews_corrcoef,
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
18 precision_score,
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
19 recall_score,
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
20 roc_auc_score,
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
21 )
4
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
22 from utils import (
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
23 add_hr_to_html,
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
24 add_plot_to_html,
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
25 build_tabbed_html,
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
26 encode_image_to_base64,
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
27 get_html_closing,
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
28 get_html_template,
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
29 )
0
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
30
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
31 logging.basicConfig(level=logging.DEBUG)
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
32 LOG = logging.getLogger(__name__)
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
33
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
34
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
35 class BaseModelTrainer:
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
36 def __init__(
2
77c88226bfde planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
37 self,
77c88226bfde planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
38 input_file,
77c88226bfde planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
39 target_col,
77c88226bfde planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
40 output_dir,
77c88226bfde planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
41 task_type,
77c88226bfde planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
42 random_seed,
77c88226bfde planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
43 test_file=None,
77c88226bfde planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
44 **kwargs,
77c88226bfde planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
45 ):
4
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
46 self.exp = None
0
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
47 self.input_file = input_file
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
48 self.target_col = target_col
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
49 self.output_dir = output_dir
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
50 self.task_type = task_type
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
51 self.random_seed = random_seed
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
52 self.data = None
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
53 self.target = None
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
54 self.best_model = None
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
55 self.results = None
6
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
56 self.tuning_results = None
0
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
57 self.features_name = None
12
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 10
diff changeset
58 self.plot_feature_names = None
0
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
59 self.plots = {}
4
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
60 self.explainer_plots = {}
0
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
61 self.plots_explainer_html = None
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
62 self.trees = []
4
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
63 self.user_kwargs = kwargs.copy()
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
64 for key, value in self.user_kwargs.items():
0
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
65 setattr(self, key, value)
12
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 10
diff changeset
66 if not hasattr(self, "plot_feature_limit"):
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 10
diff changeset
67 self.plot_feature_limit = 30
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 10
diff changeset
68 self._shap_row_cap = None
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 10
diff changeset
69 if getattr(self, "polynomial_features", False):
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 10
diff changeset
70 # Keep feature importance responsive by trimming plots/SHAP rows
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 10
diff changeset
71 try:
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 10
diff changeset
72 limit_val = int(self.plot_feature_limit)
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 10
diff changeset
73 except (TypeError, ValueError):
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 10
diff changeset
74 limit_val = 30
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 10
diff changeset
75 self.plot_feature_limit = min(limit_val, 15)
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 10
diff changeset
76 self._shap_row_cap = 200
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 10
diff changeset
77 LOG.info(
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 10
diff changeset
78 "Polynomial features enabled; limiting feature plots to %s and SHAP rows to %s",
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 10
diff changeset
79 self.plot_feature_limit,
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 10
diff changeset
80 self._shap_row_cap,
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 10
diff changeset
81 )
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 10
diff changeset
82 self.imputed_training_data = None
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 10
diff changeset
83 self._best_model_metric_used = None
0
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
84 self.setup_params = {}
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
85 self.test_file = test_file
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
86 self.test_data = None
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
87
2
77c88226bfde planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
88 if not self.output_dir:
6
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
89 raise ValueError(
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
90 "output_dir must be specified and not None"
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
91 )
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
92
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
93 # Warn about irrelevant kwargs for the task type
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
94 if self.task_type == "regression" and (
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
95 "probability_threshold" in self.user_kwargs
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
96 ):
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
97 LOG.warning(
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
98 "probability_threshold is ignored for regression tasks."
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
99 )
2
77c88226bfde planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
100
0
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
101 LOG.info(f"Model kwargs: {self.__dict__}")
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
102
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
103 def load_data(self):
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
104 LOG.info(f"Loading data from {self.input_file}")
6
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
105 self.data = pd.read_csv(
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
106 self.input_file, sep=None, engine="python"
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
107 )
2
77c88226bfde planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
108 self.data.columns = self.data.columns.str.replace(".", "_")
6
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
109
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
110 names = self.data.columns.to_list()
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
111 LOG.info(f"Original dataset columns: {names}")
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
112
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
113 target_index = int(self.target_col) - 1
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
114 num_cols = len(names)
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
115 if target_index < 0 or target_index >= num_cols:
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
116 raise ValueError(
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
117 f"Target column number {self.target_col} is invalid. "
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
118 f"Please select a number between 1 and {num_cols}."
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
119 )
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
120
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
121 self.target = names[target_index]
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
122
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
123 # Conditional drop: only if 'prediction_label' exists and is not
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
124 # the target
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
125 if "prediction_label" in self.data.columns and (
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
126 self.data.columns[target_index] != "prediction_label"
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
127 ):
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
128 LOG.info(
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
129 "Dropping 'prediction_label' column as it's not the target."
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
130 )
2
77c88226bfde planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
131 self.data = self.data.drop(columns=["prediction_label"])
6
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
132 else:
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
133 if self.target == "prediction_label":
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
134 LOG.warning(
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
135 "Using 'prediction_label' as target column. "
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
136 "This may not be intended if it's a previous prediction."
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
137 )
2
77c88226bfde planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
138
6
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
139 numeric_cols = self.data.select_dtypes(
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
140 include=["number"]
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
141 ).columns
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
142 non_numeric_cols = self.data.select_dtypes(
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
143 exclude=["number"]
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
144 ).columns
0
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
145 self.data[numeric_cols] = self.data[numeric_cols].apply(
2
77c88226bfde planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
146 pd.to_numeric, errors="coerce"
77c88226bfde planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
147 )
0
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
148 if len(non_numeric_cols) > 0:
6
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
149 LOG.info(
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
150 f"Non-numeric columns found: {non_numeric_cols.tolist()}"
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
151 )
0
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
152
6
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
153 # Update names after possible drop
0
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
154 names = self.data.columns.to_list()
6
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
155 LOG.info(f"Dataset columns after processing: {names}")
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
156
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
157 self.features_name = [n for n in names if n != self.target]
12
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 10
diff changeset
158 self.plot_feature_names = self._select_plot_features(self.features_name)
0
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
159
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
160 if self.test_file:
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
161 LOG.info(f"Loading test data from {self.test_file}")
6
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
162 df_test = pd.read_csv(
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
163 self.test_file, sep=None, engine="python"
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
164 )
4
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
165 df_test.columns = df_test.columns.str.replace(".", "_")
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
166 self.test_data = df_test
0
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
167
12
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 10
diff changeset
168 def _select_plot_features(self, all_features):
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 10
diff changeset
169 limit = getattr(self, "plot_feature_limit", 30)
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 10
diff changeset
170 if not isinstance(limit, int) or limit <= 0:
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 10
diff changeset
171 LOG.info(
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 10
diff changeset
172 "Feature plotting limit disabled (plot_feature_limit=%s).", limit
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 10
diff changeset
173 )
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 10
diff changeset
174 return all_features
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 10
diff changeset
175 if len(all_features) <= limit:
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 10
diff changeset
176 LOG.info(
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 10
diff changeset
177 "Feature plotting limit not needed (%s features <= limit %s).",
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 10
diff changeset
178 len(all_features),
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 10
diff changeset
179 limit,
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 10
diff changeset
180 )
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 10
diff changeset
181 return all_features
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 10
diff changeset
182 df = self.data[all_features].copy()
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 10
diff changeset
183 numeric_cols = df.select_dtypes(include=["number"]).columns
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 10
diff changeset
184 ranked = []
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 10
diff changeset
185 if len(numeric_cols) > 0:
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 10
diff changeset
186 variances = (
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 10
diff changeset
187 df[numeric_cols]
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 10
diff changeset
188 .var()
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 10
diff changeset
189 .fillna(0)
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 10
diff changeset
190 .abs()
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 10
diff changeset
191 .sort_values(ascending=False)
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 10
diff changeset
192 )
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 10
diff changeset
193 ranked = variances.index.tolist()
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 10
diff changeset
194 selected = []
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 10
diff changeset
195 for col in ranked:
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 10
diff changeset
196 if len(selected) >= limit:
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 10
diff changeset
197 break
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 10
diff changeset
198 selected.append(col)
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 10
diff changeset
199 if len(selected) < limit:
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 10
diff changeset
200 for col in all_features:
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 10
diff changeset
201 if col in selected:
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 10
diff changeset
202 continue
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 10
diff changeset
203 selected.append(col)
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 10
diff changeset
204 if len(selected) >= limit:
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 10
diff changeset
205 break
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 10
diff changeset
206 LOG.info(
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 10
diff changeset
207 "Limiting feature-level plots to %s of %s available features (limit=%s).",
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 10
diff changeset
208 len(selected),
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 10
diff changeset
209 len(all_features),
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 10
diff changeset
210 limit,
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 10
diff changeset
211 )
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 10
diff changeset
212 return selected
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 10
diff changeset
213
0
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
214 def setup_pycaret(self):
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
215 LOG.info("Initializing PyCaret")
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
216 self.setup_params = {
2
77c88226bfde planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
217 "target": self.target,
77c88226bfde planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
218 "session_id": self.random_seed,
77c88226bfde planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
219 "html": True,
77c88226bfde planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
220 "log_experiment": False,
77c88226bfde planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
221 "system_log": False,
77c88226bfde planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
222 "index": False,
0
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
223 }
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
224 if self.test_data is not None:
2
77c88226bfde planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
225 self.setup_params["test_data"] = self.test_data
4
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
226 for attr in [
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
227 "train_size",
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
228 "normalize",
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
229 "feature_selection",
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
230 "remove_outliers",
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
231 "remove_multicollinearity",
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
232 "polynomial_features",
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
233 "feature_interaction",
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
234 "feature_ratio",
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
235 "fix_imbalance",
10
49f73a3c12f3 planemo upload for repository https://github.com/goeckslab/gleam commit 1ffd143e57fa952ee9dd84fc141771520aea0791
goeckslab
parents: 9
diff changeset
236 "n_jobs",
4
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
237 ]:
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
238 val = getattr(self, attr, None)
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
239 if val is not None:
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
240 self.setup_params[attr] = val
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
241 if getattr(self, "cross_validation_folds", None) is not None:
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
242 self.setup_params["fold"] = self.cross_validation_folds
0
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
243 LOG.info(self.setup_params)
2
77c88226bfde planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
244
77c88226bfde planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
245 if self.task_type == "classification":
77c88226bfde planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
246 from pycaret.classification import ClassificationExperiment
77c88226bfde planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
247
77c88226bfde planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
248 self.exp = ClassificationExperiment()
77c88226bfde planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
249 elif self.task_type == "regression":
77c88226bfde planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
250 from pycaret.regression import RegressionExperiment
77c88226bfde planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
251
77c88226bfde planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
252 self.exp = RegressionExperiment()
77c88226bfde planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
253 else:
6
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
254 raise ValueError(
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
255 "task_type must be 'classification' or 'regression'"
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
256 )
2
77c88226bfde planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
257
0
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
258 self.exp.setup(self.data, **self.setup_params)
12
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 10
diff changeset
259 self._capture_imputed_training_data()
4
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
260 self.setup_params.update(self.user_kwargs)
0
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
261
12
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 10
diff changeset
262 def _capture_imputed_training_data(self):
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 10
diff changeset
263 """
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 10
diff changeset
264 Cache the dataset as transformed/imputed by PyCaret so downstream
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 10
diff changeset
265 components (e.g., feature importance) can operate on the exact data
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 10
diff changeset
266 used for training.
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 10
diff changeset
267 """
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 10
diff changeset
268 if self.exp is None:
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 10
diff changeset
269 return
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 10
diff changeset
270 try:
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 10
diff changeset
271 X_processed = self.exp.get_config("X_transformed").copy()
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 10
diff changeset
272 y_processed = self.exp.get_config("y")
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 10
diff changeset
273 if isinstance(y_processed, pd.Series):
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 10
diff changeset
274 y_series = y_processed.reset_index(drop=True)
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 10
diff changeset
275 else:
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 10
diff changeset
276 y_series = pd.Series(y_processed)
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 10
diff changeset
277 y_series.name = self.target
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 10
diff changeset
278 X_processed = X_processed.reset_index(drop=True)
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 10
diff changeset
279 self.imputed_training_data = pd.concat(
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 10
diff changeset
280 [X_processed, y_series], axis=1
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 10
diff changeset
281 )
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 10
diff changeset
282 LOG.info(
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 10
diff changeset
283 "Captured imputed training dataset from PyCaret "
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 10
diff changeset
284 "(%s rows, %s features).",
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 10
diff changeset
285 self.imputed_training_data.shape[0],
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 10
diff changeset
286 self.imputed_training_data.shape[1] - 1,
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 10
diff changeset
287 )
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 10
diff changeset
288 except Exception as exc:
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 10
diff changeset
289 LOG.warning(
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 10
diff changeset
290 "Unable to capture processed training data from PyCaret: %s",
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 10
diff changeset
291 exc,
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 10
diff changeset
292 )
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 10
diff changeset
293 self.imputed_training_data = None
9
e7dd78077b72 planemo upload for repository https://github.com/goeckslab/gleam commit 84d5cd0b1fa5c1ff0ad892bc39c95dad1ceb4920
goeckslab
parents: 6
diff changeset
294
0
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
295 def train_model(self):
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
296 LOG.info("Training and selecting the best model")
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
297 if self.task_type == "classification":
2
77c88226bfde planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
298 self.exp.add_metric(
4
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
299 id="PR-AUC-Weighted",
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
300 name="PR-AUC-Weighted",
2
77c88226bfde planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
301 target="pred_proba",
77c88226bfde planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
302 score_func=average_precision_score,
77c88226bfde planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
303 average="weighted",
77c88226bfde planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
304 )
4
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
305 # Build arguments for compare_models()
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
306 compare_kwargs = {}
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
307 if getattr(self, "models", None):
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
308 compare_kwargs["include"] = self.models
0
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
309
4
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
310 # Respect explicit cross-validation flag
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
311 if getattr(self, "cross_validation", None) is not None:
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
312 compare_kwargs["cross_validation"] = self.cross_validation
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
313
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
314 # Respect explicit fold count
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
315 if getattr(self, "cross_validation_folds", None) is not None:
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
316 compare_kwargs["fold"] = self.cross_validation_folds
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
317
12
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 10
diff changeset
318 best_metric = getattr(self, "best_model_metric", None)
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 10
diff changeset
319 if best_metric:
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 10
diff changeset
320 compare_kwargs["sort"] = best_metric
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 10
diff changeset
321 self._best_model_metric_used = best_metric
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 10
diff changeset
322 LOG.info(f"Ranking models using metric: {best_metric}")
9
e7dd78077b72 planemo upload for repository https://github.com/goeckslab/gleam commit 84d5cd0b1fa5c1ff0ad892bc39c95dad1ceb4920
goeckslab
parents: 6
diff changeset
323
4
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
324 LOG.info(f"compare_models kwargs: {compare_kwargs}")
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
325 self.best_model = self.exp.compare_models(**compare_kwargs)
12
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 10
diff changeset
326 if self._best_model_metric_used is None:
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 10
diff changeset
327 self._best_model_metric_used = getattr(self.exp, "_fold_metric", None)
0
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
328 self.results = self.exp.pull()
4
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
329 if getattr(self, "tune_model", False):
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
330 LOG.info("Tuning hyperparameters of the best model")
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
331 self.best_model = self.exp.tune_model(self.best_model)
6
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
332 self.tuning_results = self.exp.pull()
3
f6a65e05d6ec planemo upload for repository https://github.com/goeckslab/gleam commit b430f8b466655878c3bf63b053655fdbf039ddb0
goeckslab
parents: 2
diff changeset
333
0
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
334 if self.task_type == "classification":
2
77c88226bfde planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
335 self.results.rename(columns={"AUC": "ROC-AUC"}, inplace=True)
5
3d42f82b3c7f planemo upload for repository https://github.com/goeckslab/gleam commit 4a11e8a4c4e9daa884bddedfa47090476c517667
goeckslab
parents: 4
diff changeset
336
3d42f82b3c7f planemo upload for repository https://github.com/goeckslab/gleam commit 4a11e8a4c4e9daa884bddedfa47090476c517667
goeckslab
parents: 4
diff changeset
337 prob_thresh = getattr(self, "probability_threshold", None)
6
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
338 if self.task_type == "classification" and (
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
339 prob_thresh is not None
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
340 ):
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
341 _ = self.exp.predict_model(
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
342 self.best_model, probability_threshold=prob_thresh
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
343 )
5
3d42f82b3c7f planemo upload for repository https://github.com/goeckslab/gleam commit 4a11e8a4c4e9daa884bddedfa47090476c517667
goeckslab
parents: 4
diff changeset
344 else:
3d42f82b3c7f planemo upload for repository https://github.com/goeckslab/gleam commit 4a11e8a4c4e9daa884bddedfa47090476c517667
goeckslab
parents: 4
diff changeset
345 _ = self.exp.predict_model(self.best_model)
3d42f82b3c7f planemo upload for repository https://github.com/goeckslab/gleam commit 4a11e8a4c4e9daa884bddedfa47090476c517667
goeckslab
parents: 4
diff changeset
346
0
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
347 self.test_result_df = self.exp.pull()
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
348 if self.task_type == "classification":
6
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
349 self.test_result_df.rename(
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
350 columns={"AUC": "ROC-AUC"}, inplace=True
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
351 )
0
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
352
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
353 def save_model(self):
4
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
354 hdf5_path = Path(self.output_dir) / "pycaret_model.h5"
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
355 with h5py.File(hdf5_path, "w") as f:
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
356 with tempfile.NamedTemporaryFile(delete=False) as tmp:
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
357 joblib.dump(self.best_model, tmp.name)
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
358 tmp.seek(0)
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
359 model_bytes = tmp.read()
2
77c88226bfde planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
360 f.create_dataset("model", data=np.void(model_bytes))
0
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
361
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
362 def generate_plots(self):
4
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
363 LOG.info("Generating PyCaret diagnostic pltos")
0
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
364
6
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
365 # choose the right plots based on task type
4
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
366 if self.task_type == "classification":
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
367 plot_names = [
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
368 "learning",
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
369 "vc",
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
370 "calibration",
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
371 "dimension",
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
372 "manifold",
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
373 "rfe",
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
374 "threshold",
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
375 "percentage_above_below",
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
376 "class_report",
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
377 "pr_auc",
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
378 "roc_auc",
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
379 ]
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
380 else:
6
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
381 plot_names = ["residuals", "vc", "parameter", "error",
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
382 "learning"]
4
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
383 for name in plot_names:
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
384 try:
6
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
385 ax = self.exp.plot_model(
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
386 self.best_model, plot=name, save=False
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
387 )
4
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
388 out_path = Path(self.output_dir) / f"plot_{name}.png"
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
389 fig = ax.get_figure()
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
390 fig.savefig(out_path, bbox_inches="tight")
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
391 self.plots[name] = str(out_path)
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
392 except Exception as e:
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
393 LOG.warning(f"Could not generate {name} plot: {e}")
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
394
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
395 def encode_image_to_base64(self, img_path: str) -> str:
2
77c88226bfde planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
396 with open(img_path, "rb") as img_file:
77c88226bfde planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
397 return base64.b64encode(img_file.read()).decode("utf-8")
0
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
398
13
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
399 def _build_dataset_overview(self):
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
400 """
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
401 Build an HTML table showing label counts with labels as rows and splits
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
402 (Train / Validation / Test) as columns. Each cell shows count and
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
403 percentage of that split. Returns empty string for regression or when
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
404 no label data is available.
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
405 """
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
406 if self.task_type != "classification":
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
407 return ""
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
408
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
409 def _safe_series(obj):
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
410 try:
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
411 return pd.Series(obj).reset_index(drop=True)
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
412 except Exception:
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
413 return None
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
414
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
415 def _get_from_config(keys):
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
416 if self.exp is None:
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
417 return None
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
418 for key in keys:
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
419 try:
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
420 val = self.exp.get_config(key)
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
421 except Exception:
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
422 val = getattr(self.exp, key, None)
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
423 if val is not None:
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
424 return val
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
425 return None
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
426
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
427 # Prefer PyCaret-configured splits; fall back to raw inputs.
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
428 X_train = _get_from_config(["X_train_transformed", "X_train"])
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
429 y_train = _get_from_config(["y_train_transformed", "y_train"])
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
430 y_test_cfg = _get_from_config(["y_test_transformed", "y_test"])
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
431
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
432 if y_train is None and self.data is not None and self.target in self.data.columns:
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
433 y_train = self.data[self.target]
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
434
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
435 y_train_series = _safe_series(y_train)
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
436
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
437 # Build a cross-validation generator to derive a validation subset size.
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
438 cv_gen = self._get_cv_generator(y_train_series)
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
439 y_train_fold = y_train_series
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
440 y_val_fold = None
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
441 if cv_gen is not None and y_train_series is not None:
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
442 try:
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
443 # Use the first fold to approximate Train/Validation split sizes.
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
444 splitter = cv_gen.split(
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
445 pd.DataFrame(X_train).reset_index(drop=True)
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
446 if X_train is not None
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
447 else y_train_series,
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
448 y_train_series,
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
449 )
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
450 train_idx, val_idx = next(iter(splitter))
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
451 y_train_fold = y_train_series.iloc[train_idx].reset_index(drop=True)
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
452 y_val_fold = y_train_series.iloc[val_idx].reset_index(drop=True)
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
453 except Exception as exc:
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
454 LOG.warning("Could not derive validation split for dataset overview: %s", exc)
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
455
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
456 # Test labels: prefer PyCaret transformed holdout (single file) or external test.
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
457 if self.test_data is not None:
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
458 if y_test_cfg is not None:
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
459 y_test = y_test_cfg
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
460 elif self.target in self.test_data.columns:
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
461 y_test = self.test_data[self.target]
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
462 else:
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
463 y_test = None
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
464 else:
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
465 y_test = y_test_cfg
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
466
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
467 split_map = {
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
468 "Train": _safe_series(y_train_fold),
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
469 "Validation": _safe_series(y_val_fold),
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
470 "Test": _safe_series(y_test),
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
471 }
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
472 available = {k: v for k, v in split_map.items() if v is not None and not v.empty}
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
473 if not available:
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
474 return ""
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
475
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
476 # Collect all labels across available splits (including NaN)
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
477 label_pool = pd.concat(
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
478 available.values(), ignore_index=True
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
479 )
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
480 labels = pd.unique(label_pool)
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
481
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
482 def _count_for_label(series, label):
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
483 if series is None or series.empty:
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
484 return None, None
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
485 total = len(series)
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
486 if pd.isna(label):
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
487 cnt = series.isna().sum()
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
488 else:
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
489 cnt = (series == label).sum()
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
490 return int(cnt), total
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
491
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
492 rows = []
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
493 for label in labels:
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
494 row = ["NaN" if pd.isna(label) else str(label)]
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
495 for split_name in ["Train", "Validation", "Test"]:
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
496 cnt, total = _count_for_label(split_map.get(split_name), label)
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
497 if cnt is None or total is None:
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
498 cell = "—"
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
499 else:
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
500 pct = (cnt / total * 100) if total else 0
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
501 cell = f"{cnt} ({pct:.1f}%)"
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
502 row.append(cell)
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
503 rows.append(row)
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
504
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
505 df = pd.DataFrame(rows, columns=["Label", "Train", "Validation", "Test"])
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
506 df.sort_values("Label", inplace=True)
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
507
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
508 return (
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
509 "<h2>Dataset Overview</h2>"
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
510 + '<div class="table-wrapper">'
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
511 + df.to_html(
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
512 index=False,
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
513 classes=["table", "sortable", "table-dataset-overview"],
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
514 )
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
515 + "</div>"
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
516 )
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
517
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
518 def _predict_with_thresholds(self, X, y_true):
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
519 """
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
520 Generate predictions/probabilities for a split, respecting an optional
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
521 probability threshold for binary tasks. Returns a dict with y_true,
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
522 y_pred, y_scores (positive-class probs when available), pos_label,
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
523 and neg_label.
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
524 """
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
525 if X is None or y_true is None:
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
526 return None
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
527
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
528 y_true_series = pd.Series(y_true).reset_index(drop=True)
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
529 classes = list(getattr(self.best_model, "classes_", []))
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
530 if not classes:
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
531 try:
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
532 classes = pd.unique(y_true_series).tolist()
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
533 except Exception:
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
534 classes = []
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
535 if len(classes) > 1:
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
536 try:
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
537 pos_idx = classes.index(1)
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
538 except Exception:
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
539 pos_idx = 1
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
540 else:
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
541 pos_idx = 0
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
542 pos_idx = min(pos_idx, len(classes) - 1) if classes else 0
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
543 pos_label = (
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
544 classes[pos_idx]
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
545 if len(classes) > pos_idx and pos_idx >= 0
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
546 else (classes[-1] if classes else 1)
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
547 )
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
548 neg_label = None
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
549 if len(classes) >= 2:
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
550 neg_candidates = [c for c in classes if c != pos_label]
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
551 if neg_candidates:
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
552 neg_label = neg_candidates[0]
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
553
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
554 prob_thresh = getattr(self, "probability_threshold", None)
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
555 y_scores = None
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
556 try:
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
557 proba = self.best_model.predict_proba(X)
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
558 y_scores = np.asarray(proba) if proba is not None else None
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
559 except Exception:
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
560 y_scores = None
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
561
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
562 try:
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
563 if (
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
564 prob_thresh is not None
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
565 and not getattr(self.exp, "is_multiclass", False)
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
566 and y_scores is not None
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
567 and y_scores.ndim == 2
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
568 and y_scores.shape[1] > 1
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
569 ):
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
570 pos_idx = min(pos_idx, y_scores.shape[1] - 1)
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
571 neg_idx = 1 - pos_idx if y_scores.shape[1] > 1 else 0
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
572 if neg_label is None and len(classes) > neg_idx:
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
573 neg_label = classes[neg_idx]
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
574 y_pred = np.where(
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
575 y_scores[:, pos_idx] >= prob_thresh,
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
576 pos_label,
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
577 neg_label if neg_label is not None else 0,
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
578 )
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
579 y_scores = y_scores[:, pos_idx]
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
580 else:
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
581 y_pred = self.best_model.predict(X)
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
582 if (
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
583 not getattr(self.exp, "is_multiclass", False)
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
584 and y_scores is not None
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
585 and y_scores.ndim == 2
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
586 and y_scores.shape[1] > 1
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
587 ):
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
588 pos_idx = min(pos_idx, y_scores.shape[1] - 1)
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
589 y_scores = y_scores[:, pos_idx]
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
590 except Exception as exc:
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
591 LOG.warning(
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
592 "Falling back to raw predict while computing performance summary: %s",
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
593 exc,
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
594 )
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
595 try:
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
596 y_pred = self.best_model.predict(X)
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
597 except Exception as exc_inner:
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
598 LOG.warning(
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
599 "Unable to score split after fallback prediction: %s",
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
600 exc_inner,
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
601 )
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
602 return None
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
603 y_scores = None
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
604
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
605 y_pred_series = pd.Series(y_pred).reset_index(drop=True)
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
606 if y_scores is not None:
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
607 y_scores = np.asarray(y_scores)
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
608 if y_scores.ndim > 1 and y_scores.shape[1] == 1:
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
609 y_scores = y_scores.ravel()
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
610 if getattr(self.exp, "is_multiclass", False) and y_scores.ndim > 1:
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
611 # Avoid passing multiclass score matrices to ROC/PR utilities
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
612 y_scores = None
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
613
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
614 return {
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
615 "y_true": y_true_series,
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
616 "y_pred": y_pred_series,
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
617 "y_scores": y_scores,
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
618 "pos_label": pos_label,
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
619 "neg_label": neg_label,
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
620 }
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
621
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
622 def _get_cv_generator(self, y_series):
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
623 """
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
624 Build a cross-validation splitter that mirrors the experiment's
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
625 configuration. Returns None when CV is disabled or not applicable.
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
626 """
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
627 if self.task_type != "classification":
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
628 return None
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
629
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
630 if getattr(self, "cross_validation", None) is False:
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
631 return None
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
632
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
633 try:
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
634 cfg_gen = self.exp.get_config("fold_generator")
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
635 if cfg_gen is not None:
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
636 return cfg_gen
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
637 except Exception:
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
638 cfg_gen = None
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
639
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
640 folds = (
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
641 getattr(self, "cross_validation_folds", None)
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
642 or self.setup_params.get("fold")
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
643 or getattr(self.exp, "fold", None)
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
644 or 10
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
645 )
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
646 try:
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
647 folds = int(folds)
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
648 except Exception:
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
649 folds = 10
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
650
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
651 try:
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
652 y_series = pd.Series(y_series).reset_index(drop=True)
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
653 except Exception:
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
654 y_series = None
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
655 if y_series is None or y_series.empty:
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
656 return None
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
657
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
658 if folds < 2:
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
659 return None
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
660 if len(y_series) < folds:
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
661 folds = len(y_series)
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
662 if folds < 2:
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
663 return None
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
664
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
665 try:
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
666 from sklearn.model_selection import KFold, StratifiedKFold
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
667
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
668 if self.task_type == "classification":
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
669 return StratifiedKFold(
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
670 n_splits=folds,
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
671 shuffle=True,
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
672 random_state=self.random_seed,
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
673 )
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
674 return KFold(
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
675 n_splits=folds,
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
676 shuffle=True,
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
677 random_state=self.random_seed,
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
678 )
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
679 except Exception as exc:
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
680 LOG.warning("Could not build CV generator: %s", exc)
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
681 return None
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
682
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
683 def _get_cross_validated_predictions(self, X, y):
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
684 """
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
685 Generate cross-validated predictions for the validation split so we
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
686 can report validation metrics for the selected best model.
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
687 """
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
688 if self.task_type != "classification":
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
689 return None
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
690 if getattr(self, "cross_validation", None) is False:
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
691 return None
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
692 if X is None or y is None:
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
693 return None
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
694
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
695 try:
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
696 from sklearn.model_selection import cross_val_predict
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
697 except Exception as exc:
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
698 LOG.warning("cross_val_predict unavailable: %s", exc)
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
699 return None
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
700
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
701 y_series = pd.Series(y).reset_index(drop=True)
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
702 if y_series.empty:
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
703 return None
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
704
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
705 cv_gen = self._get_cv_generator(y_series)
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
706 if cv_gen is None:
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
707 return None
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
708
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
709 X_df = pd.DataFrame(X).reset_index(drop=True)
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
710 if len(X_df) != len(y_series):
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
711 X_df = X_df.iloc[: len(y_series)].reset_index(drop=True)
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
712
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
713 classes = list(getattr(self.best_model, "classes_", []))
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
714 if len(classes) > 1:
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
715 try:
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
716 pos_idx = classes.index(1)
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
717 except Exception:
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
718 pos_idx = 1
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
719 else:
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
720 pos_idx = 0
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
721 pos_idx = min(pos_idx, len(classes) - 1) if classes else 0
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
722 pos_label = (
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
723 classes[pos_idx] if len(classes) > pos_idx else 1
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
724 )
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
725 neg_label = None
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
726 if len(classes) >= 2:
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
727 neg_candidates = [c for c in classes if c != pos_label]
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
728 if neg_candidates:
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
729 neg_label = neg_candidates[0]
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
730
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
731 prob_thresh = getattr(self, "probability_threshold", None)
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
732 n_jobs = getattr(self, "n_jobs", None)
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
733
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
734 y_scores = None
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
735 if not getattr(self.exp, "is_multiclass", False):
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
736 try:
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
737 proba = cross_val_predict(
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
738 self.best_model,
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
739 X_df,
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
740 y_series,
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
741 cv=cv_gen,
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
742 method="predict_proba",
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
743 n_jobs=n_jobs,
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
744 )
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
745 y_scores = np.asarray(proba)
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
746 except Exception as exc:
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
747 LOG.debug("Could not compute CV probabilities: %s", exc)
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
748
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
749 y_pred = None
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
750 if (
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
751 prob_thresh is not None
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
752 and not getattr(self.exp, "is_multiclass", False)
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
753 and y_scores is not None
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
754 and y_scores.ndim == 2
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
755 and y_scores.shape[1] > 1
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
756 ):
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
757 pos_idx = min(pos_idx, y_scores.shape[1] - 1)
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
758 neg_idx = 1 - pos_idx if y_scores.shape[1] > 1 else 0
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
759 if neg_label is None and len(classes) > neg_idx:
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
760 neg_label = classes[neg_idx]
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
761 y_pred = np.where(
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
762 y_scores[:, pos_idx] >= prob_thresh,
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
763 pos_label,
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
764 neg_label if neg_label is not None else 0,
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
765 )
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
766 y_scores = y_scores[:, pos_idx]
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
767 else:
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
768 try:
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
769 y_pred = cross_val_predict(
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
770 self.best_model,
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
771 X_df,
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
772 y_series,
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
773 cv=cv_gen,
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
774 method="predict",
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
775 n_jobs=n_jobs,
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
776 )
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
777 except Exception as exc:
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
778 LOG.warning(
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
779 "Could not compute cross-validated predictions: %s",
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
780 exc,
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
781 )
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
782 return None
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
783 if (
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
784 not getattr(self.exp, "is_multiclass", False)
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
785 and y_scores is not None
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
786 and y_scores.ndim == 2
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
787 and y_scores.shape[1] > 1
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
788 ):
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
789 pos_idx = min(pos_idx, y_scores.shape[1] - 1)
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
790 y_scores = y_scores[:, pos_idx]
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
791
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
792 if y_scores is not None and getattr(self.exp, "is_multiclass", False):
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
793 y_scores = None
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
794
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
795 return {
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
796 "y_true": y_series,
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
797 "y_pred": pd.Series(y_pred).reset_index(drop=True),
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
798 "y_scores": y_scores,
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
799 "pos_label": pos_label,
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
800 "neg_label": neg_label,
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
801 }
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
802
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
803 def _get_split_predictions_for_report(self):
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
804 """
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
805 Collect predictions/probabilities for Train/Validation/Test splits so the
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
806 performance table can show consistent metrics across splits.
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
807 """
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
808 if self.task_type != "classification":
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
809 return {}
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
810
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
811 def _get_from_config(keys):
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
812 for key in keys:
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
813 try:
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
814 val = self.exp.get_config(key)
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
815 except Exception:
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
816 val = getattr(self.exp, key, None)
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
817 if val is not None:
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
818 return val
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
819 return None
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
820
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
821 X_train = _get_from_config(["X_train_transformed", "X_train"])
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
822 y_train = _get_from_config(["y_train_transformed", "y_train"])
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
823 X_holdout = _get_from_config(["X_test_transformed", "X_test"])
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
824 y_holdout = _get_from_config(["y_test_transformed", "y_test"])
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
825
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
826 predictions = {}
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
827
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
828 # Train metrics (best model on training data)
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
829 if X_train is not None and y_train is not None:
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
830 try:
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
831 train_preds = self._predict_with_thresholds(X_train, y_train)
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
832 if train_preds is not None:
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
833 predictions["Train"] = train_preds
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
834 except Exception as exc:
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
835 LOG.warning(
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
836 "Could not score Train split for performance summary: %s",
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
837 exc,
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
838 )
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
839
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
840 # Validation metrics via cross-validation on training data
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
841 try:
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
842 val_preds = self._get_cross_validated_predictions(X_train, y_train)
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
843 if val_preds is not None:
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
844 predictions["Validation"] = val_preds
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
845 except Exception as exc:
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
846 LOG.warning(
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
847 "Could not score Validation split for performance summary: %s",
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
848 exc,
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
849 )
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
850
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
851 # Test metrics (holdout from single file, or provided test file)
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
852 X_test = X_holdout
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
853 y_test = y_holdout
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
854 if (X_test is None or y_test is None) and self.test_data is not None:
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
855 try:
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
856 X_test = self.test_data.drop(columns=[self.target])
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
857 y_test = self.test_data[self.target]
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
858 except Exception as exc:
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
859 LOG.warning(
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
860 "Could not prepare external test data for performance summary: %s",
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
861 exc,
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
862 )
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
863
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
864 if X_test is not None and y_test is not None:
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
865 try:
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
866 test_preds = self._predict_with_thresholds(X_test, y_test)
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
867 if test_preds is not None:
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
868 predictions["Test"] = test_preds
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
869 except Exception as exc:
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
870 LOG.warning(
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
871 "Could not score Test split for performance summary: %s",
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
872 exc,
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
873 )
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
874 return predictions
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
875
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
876 def _compute_metric_value(self, metric_name, preds, split_name):
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
877 """
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
878 Compute a single metric for a given split prediction bundle.
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
879 """
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
880 if preds is None:
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
881 return None
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
882
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
883 y_true = preds["y_true"]
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
884 y_pred = preds["y_pred"]
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
885 y_scores = preds.get("y_scores")
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
886 pos_label = preds.get("pos_label")
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
887 neg_label = preds.get("neg_label")
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
888 is_multiclass = getattr(self.exp, "is_multiclass", False)
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
889
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
890 def _format_binary_labels(series):
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
891 if pos_label is None:
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
892 return series
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
893 try:
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
894 return (series == pos_label).astype(int)
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
895 except Exception:
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
896 return series
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
897
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
898 try:
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
899 if metric_name == "Accuracy":
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
900 return accuracy_score(y_true, y_pred)
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
901 if metric_name == "ROC-AUC":
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
902 if y_scores is None:
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
903 return None
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
904 y_true_bin = _format_binary_labels(y_true)
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
905 if len(pd.unique(y_true_bin)) < 2:
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
906 return None
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
907 return roc_auc_score(y_true_bin, y_scores)
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
908 if metric_name == "Precision":
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
909 if is_multiclass:
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
910 return precision_score(
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
911 y_true, y_pred, average="weighted", zero_division=0
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
912 )
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
913 try:
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
914 return precision_score(
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
915 y_true, y_pred, pos_label=pos_label, zero_division=0
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
916 )
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
917 except Exception:
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
918 return precision_score(
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
919 y_true, y_pred, average="weighted", zero_division=0
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
920 )
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
921 if metric_name == "Recall":
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
922 if is_multiclass:
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
923 return recall_score(
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
924 y_true, y_pred, average="weighted", zero_division=0
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
925 )
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
926 try:
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
927 return recall_score(
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
928 y_true, y_pred, pos_label=pos_label, zero_division=0
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
929 )
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
930 except Exception:
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
931 return recall_score(
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
932 y_true, y_pred, average="weighted", zero_division=0
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
933 )
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
934 if metric_name == "F1-Score":
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
935 if is_multiclass:
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
936 return f1_score(
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
937 y_true, y_pred, average="weighted", zero_division=0
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
938 )
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
939 try:
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
940 return f1_score(
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
941 y_true, y_pred, pos_label=pos_label, zero_division=0
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
942 )
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
943 except Exception:
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
944 return f1_score(
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
945 y_true, y_pred, average="weighted", zero_division=0
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
946 )
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
947 if metric_name == "PR-AUC":
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
948 if y_scores is None:
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
949 return None
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
950 y_true_bin = _format_binary_labels(y_true)
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
951 if len(pd.unique(y_true_bin)) < 2:
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
952 return None
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
953 return average_precision_score(y_true_bin, y_scores)
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
954 if metric_name == "Specificity":
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
955 labels = pd.unique(pd.concat([y_true, y_pred], ignore_index=True))
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
956 if len(labels) != 2:
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
957 return None
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
958 if pos_label is None or pos_label not in labels:
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
959 pos_label = labels[1]
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
960 neg_candidates = [lbl for lbl in labels if lbl != pos_label]
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
961 neg_label_final = (
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
962 neg_label if neg_label in labels else (neg_candidates[0] if neg_candidates else None)
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
963 )
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
964 if neg_label_final is None:
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
965 return None
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
966 cm = confusion_matrix(
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
967 y_true, y_pred, labels=[neg_label_final, pos_label]
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
968 )
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
969 if cm.shape != (2, 2):
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
970 return None
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
971 tn, fp, fn, tp = cm.ravel()
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
972 denom = tn + fp
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
973 return (tn / denom) if denom else None
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
974 if metric_name == "MCC":
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
975 return matthews_corrcoef(y_true, y_pred)
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
976 except Exception as exc:
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
977 LOG.warning(
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
978 "Could not compute %s for %s split: %s",
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
979 metric_name,
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
980 split_name,
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
981 exc,
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
982 )
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
983 return None
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
984 return None
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
985
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
986 def _build_performance_summary_table(self):
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
987 """
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
988 Build a Train/Validation/Test metrics table for classification tasks.
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
989 Returns empty string when metrics are unavailable or not applicable.
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
990 """
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
991 if self.task_type != "classification":
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
992 return ""
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
993
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
994 split_predictions = self._get_split_predictions_for_report()
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
995 validation_best_row = None
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
996 try:
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
997 if isinstance(self.results, pd.DataFrame) and not self.results.empty:
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
998 validation_best_row = self.results.iloc[0]
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
999 except Exception:
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
1000 validation_best_row = None
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
1001
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
1002 if not split_predictions and validation_best_row is None:
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
1003 return ""
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
1004
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
1005 metric_names = [
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
1006 "Accuracy",
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
1007 "ROC-AUC",
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
1008 "Precision",
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
1009 "Recall",
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
1010 "F1-Score",
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
1011 "PR-AUC",
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
1012 "Specificity",
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
1013 "MCC",
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
1014 ]
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
1015
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
1016 validation_column_map = {
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
1017 "Accuracy": ["Accuracy"],
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
1018 "ROC-AUC": ["ROC-AUC", "AUC"],
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
1019 "Precision": ["Precision", "Prec.", "Prec"],
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
1020 "Recall": ["Recall"],
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
1021 "F1-Score": ["F1-Score", "F1"],
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
1022 "PR-AUC": ["PR-AUC", "PR-AUC-Weighted", "PRC"],
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
1023 "Specificity": ["Specificity"],
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
1024 "MCC": ["MCC"],
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
1025 }
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
1026
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
1027 def _fmt(value):
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
1028 if value is None:
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
1029 return "—"
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
1030 try:
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
1031 if isinstance(value, (float, np.floating)) and (
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
1032 np.isnan(value) or np.isinf(value)
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
1033 ):
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
1034 return "—"
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
1035 return f"{value:.3f}"
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
1036 except Exception:
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
1037 return str(value)
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
1038
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
1039 def _validation_metric(metric_name):
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
1040 if validation_best_row is None:
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
1041 return None
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
1042 cols = validation_column_map.get(metric_name, [])
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
1043 for col in cols:
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
1044 if col in validation_best_row:
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
1045 try:
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
1046 return validation_best_row[col]
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
1047 except Exception:
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
1048 return None
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
1049 return None
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
1050
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
1051 rows = []
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
1052 for metric in metric_names:
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
1053 row = [metric]
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
1054 # Train
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
1055 train_val = self._compute_metric_value(
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
1056 metric, split_predictions.get("Train"), "Train"
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
1057 )
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
1058 row.append(_fmt(train_val))
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
1059
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
1060 # Validation from Train & Validation Summary first row; fallback to computed CV.
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
1061 val_val = _validation_metric(metric)
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
1062 if val_val is None:
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
1063 val_val = self._compute_metric_value(
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
1064 metric, split_predictions.get("Validation"), "Validation"
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
1065 )
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
1066 row.append(_fmt(val_val))
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
1067
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
1068 # Test
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
1069 test_val = self._compute_metric_value(
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
1070 metric, split_predictions.get("Test"), "Test"
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
1071 )
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
1072 row.append(_fmt(test_val))
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
1073 rows.append(row)
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
1074
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
1075 df = pd.DataFrame(rows, columns=["Metric", "Train", "Validation", "Test"])
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
1076 return (
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
1077 "<h2>Model Performance Summary</h2>"
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
1078 + '<div class="table-wrapper">'
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
1079 + df.to_html(
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
1080 index=False,
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
1081 classes=["table", "sortable", "table-perf-summary"],
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
1082 )
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
1083 + "</div>"
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
1084 )
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
1085
12
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 10
diff changeset
1086 def _resolve_plot_callable(self, key, fig_or_fn, section):
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 10
diff changeset
1087 """
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 10
diff changeset
1088 Safely execute stored plot callables so a single failure does not
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 10
diff changeset
1089 abort the entire HTML report generation.
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 10
diff changeset
1090 """
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 10
diff changeset
1091 if fig_or_fn is None:
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 10
diff changeset
1092 return None
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 10
diff changeset
1093 try:
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 10
diff changeset
1094 return fig_or_fn() if callable(fig_or_fn) else fig_or_fn
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 10
diff changeset
1095 except Exception as exc:
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 10
diff changeset
1096 extra = ""
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 10
diff changeset
1097 if isinstance(exc, ValueError) and "Input contains NaN" in str(exc):
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 10
diff changeset
1098 extra = (
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 10
diff changeset
1099 " (model returned NaN probabilities; "
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 10
diff changeset
1100 "consider checking data preprocessing)"
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 10
diff changeset
1101 )
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 10
diff changeset
1102 LOG.warning(
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 10
diff changeset
1103 "Skipping %s plot '%s' due to error: %s%s",
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 10
diff changeset
1104 section,
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 10
diff changeset
1105 key,
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 10
diff changeset
1106 exc,
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 10
diff changeset
1107 extra,
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 10
diff changeset
1108 )
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 10
diff changeset
1109 return None
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 10
diff changeset
1110
0
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
1111 def save_html_report(self):
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
1112 LOG.info("Saving HTML report")
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
1113
4
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
1114 # 1) Determine best model name
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
1115 try:
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
1116 best_model_name = str(self.results.iloc[0]["Model"])
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
1117 except Exception:
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
1118 best_model_name = type(self.best_model).__name__
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
1119 LOG.info(f"Best model determined as: {best_model_name}")
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
1120
6
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
1121 # 2) Compute training sample count
4
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
1122 try:
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
1123 n_train = self.exp.X_train.shape[0]
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
1124 except Exception:
6
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
1125 n_train = getattr(
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
1126 self.exp, "X_train_transformed", pd.DataFrame()
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
1127 ).shape[0]
4
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
1128 total_rows = self.data.shape[0]
2
77c88226bfde planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1129
4
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
1130 # 3) Build setup parameters table
5
3d42f82b3c7f planemo upload for repository https://github.com/goeckslab/gleam commit 4a11e8a4c4e9daa884bddedfa47090476c517667
goeckslab
parents: 4
diff changeset
1131 all_params = self.setup_params.copy()
6
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
1132 if self.task_type == "classification" and (
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
1133 hasattr(self, "probability_threshold")
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
1134 ):
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
1135 all_params["probability_threshold"] = (
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
1136 self.probability_threshold
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
1137 )
4
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
1138 display_keys = [
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
1139 "Target",
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
1140 "Session ID",
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
1141 "Train Size",
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
1142 "Normalize",
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
1143 "Feature Selection",
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
1144 "Cross Validation",
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
1145 "Cross Validation Folds",
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
1146 "Remove Outliers",
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
1147 "Remove Multicollinearity",
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
1148 "Polynomial Features",
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
1149 "Fix Imbalance",
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
1150 "Models",
5
3d42f82b3c7f planemo upload for repository https://github.com/goeckslab/gleam commit 4a11e8a4c4e9daa884bddedfa47090476c517667
goeckslab
parents: 4
diff changeset
1151 "Probability Threshold",
4
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
1152 ]
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
1153 setup_rows = []
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
1154 for key in display_keys:
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
1155 pk = key.lower().replace(" ", "_")
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
1156 v = all_params.get(pk)
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
1157 if key == "Train Size":
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
1158 frac = (
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
1159 float(v)
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
1160 if v is not None
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
1161 else (n_train / total_rows if total_rows else 0)
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
1162 )
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
1163 dv = f"{frac:.2f} ({n_train} rows)"
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
1164 elif key in {
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
1165 "Normalize",
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
1166 "Feature Selection",
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
1167 "Cross Validation",
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
1168 "Remove Outliers",
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
1169 "Remove Multicollinearity",
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
1170 "Polynomial Features",
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
1171 "Fix Imbalance",
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
1172 }:
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
1173 dv = bool(v)
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
1174 elif key == "Cross Validation Folds":
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
1175 dv = v if v is not None else "None"
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
1176 elif key == "Models":
6
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
1177 dv = ", ".join(map(str, v)) if isinstance(
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
1178 v, (list, tuple)
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
1179 ) else "None"
5
3d42f82b3c7f planemo upload for repository https://github.com/goeckslab/gleam commit 4a11e8a4c4e9daa884bddedfa47090476c517667
goeckslab
parents: 4
diff changeset
1180 elif key == "Probability Threshold":
6
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
1181 dv = f"{v:.2f}" if v is not None else "0.5"
4
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
1182 else:
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
1183 dv = v if v is not None else "None"
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
1184 setup_rows.append([key, dv])
12
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 10
diff changeset
1185 metric_label = self._best_model_metric_used or getattr(
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 10
diff changeset
1186 self.exp, "_fold_metric", None
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 10
diff changeset
1187 )
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 10
diff changeset
1188 if metric_label:
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 10
diff changeset
1189 setup_rows.append(["Best Model Metric", metric_label])
4
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
1190
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
1191 df_setup = pd.DataFrame(setup_rows, columns=["Parameter", "Value"])
6
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
1192 df_setup.to_csv(
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
1193 Path(self.output_dir) / "setup_params.csv", index=False
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
1194 )
4
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
1195
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
1196 # 4) Persist CSVs
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
1197 self.results.to_csv(
6
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
1198 Path(self.output_dir) / "comparison_results.csv",
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
1199 index=False
4
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
1200 )
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
1201 self.test_result_df.to_csv(
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
1202 Path(self.output_dir) / "test_results.csv", index=False
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
1203 )
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
1204 pd.DataFrame(
6
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
1205 self.best_model.get_params().items(),
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
1206 columns=["Parameter", "Value"]
4
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
1207 ).to_csv(Path(self.output_dir) / "best_model.csv", index=False)
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
1208
6
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
1209 if self.tuning_results is not None:
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
1210 self.tuning_results.to_csv(
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
1211 Path(self.output_dir) / "tuning_results.csv",
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
1212 index=False
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
1213 )
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
1214
4
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
1215 # 5) Header
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
1216 header = f"<h2>Best Model: {best_model_name}</h2>"
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
1217
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
1218 # — Validation Summary & Configuration —
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
1219 val_df = self.results.copy()
13
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
1220 dataset_overview_html = self._build_dataset_overview()
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
1221 performance_summary_html = self._build_performance_summary_table()
4
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
1222 # mapping raw plot keys to user-friendly titles
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
1223 plot_title_map = {
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
1224 "learning": "Learning Curve",
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
1225 "vc": "Validation Curve",
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
1226 "calibration": "Calibration Curve",
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
1227 "dimension": "Dimensionality Reduction",
13
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
1228 "manifold": "t-SNE",
4
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
1229 "rfe": "Recursive Feature Elimination",
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
1230 "threshold": "Threshold Plot",
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
1231 "percentage_above_below": "Percentage Above vs. Below Cutoff",
13
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
1232 "class_report": "Per-Class Metrics",
4
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
1233 "pr_auc": "Precision-Recall AUC",
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
1234 "roc_auc": "Receiver Operating Characteristic AUC",
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
1235 "residuals": "Residuals Distribution",
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
1236 "error": "Prediction Error Distribution",
0
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
1237 }
6
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
1238 val_df.drop(
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
1239 columns=["TT (Ec)", "TT (Sec)"], errors="ignore", inplace=True
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
1240 )
4
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
1241 summary_html = (
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
1242 header
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
1243 + "<h2>Train & Validation Summary</h2>"
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
1244 + '<div class="table-wrapper">'
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
1245 + val_df.to_html(index=False, classes="table sortable")
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
1246 + "</div>"
6
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
1247 )
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
1248
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
1249 if self.tuning_results is not None:
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
1250 tuning_df = self.tuning_results.copy()
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
1251 tuning_df.drop(
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
1252 columns=["TT (Sec)"], errors="ignore", inplace=True
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
1253 )
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
1254 summary_html += (
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
1255 f"<h2>{best_model_name}: Tuning Summary</h2>"
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
1256 + '<div class="table-wrapper">'
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
1257 + tuning_df.to_html(index=False, classes="table sortable")
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
1258 + "</div>"
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
1259 )
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
1260
13
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
1261 config_html = (
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
1262 header
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
1263 + dataset_overview_html
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
1264 + performance_summary_html
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
1265 + "<h2>Setup Parameters</h2>"
4
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
1266 + '<div class="table-wrapper">'
13
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
1267 + df_setup.to_html(
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
1268 index=False,
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
1269 classes=["table", "sortable", "table-setup-params"],
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
1270 )
4
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
1271 + "</div>"
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
1272 # — Hyperparameters
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
1273 + "<h2>Best Model Hyperparameters</h2>"
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
1274 + '<div class="table-wrapper">'
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
1275 + pd.DataFrame(
6
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
1276 self.best_model.get_params().items(),
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
1277 columns=["Parameter", "Value"]
13
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
1278 ).to_html(
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
1279 index=False,
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
1280 classes=["table", "sortable", "table-hyperparams"],
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
1281 )
4
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
1282 + "</div>"
0
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
1283 )
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
1284
4
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
1285 # choose summary plots based on task type
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
1286 if self.task_type == "classification":
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
1287 summary_plots = [
13
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
1288 "threshold",
4
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
1289 "learning",
13
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
1290 "calibration",
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
1291 "rfe",
4
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
1292 "vc",
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
1293 "dimension",
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
1294 "manifold",
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
1295 "percentage_above_below",
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
1296 ]
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
1297 else:
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
1298 summary_plots = ["learning", "vc", "parameter", "residuals"]
0
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
1299
4
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
1300 for name in summary_plots:
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
1301 if name in self.plots:
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
1302 summary_html += "<hr>"
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
1303 b64 = encode_image_to_base64(self.plots[name])
6
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
1304 title = plot_title_map.get(
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
1305 name, name.replace("_", " ").title()
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
1306 )
4
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
1307 summary_html += (
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
1308 '<div class="plot">'
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
1309 f"<h2>{title}</h2>"
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
1310 f'<img src="data:image/png;base64,{b64}" '
6
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
1311 'style="max-width:90%;max-height:600px;'
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
1312 'border:1px solid #ddd;"/>'
4
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
1313 "</div>"
2
77c88226bfde planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1314 )
0
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
1315
4
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
1316 # — Test Summary —
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
1317 test_html = (
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
1318 header
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
1319 + '<div class="table-wrapper">'
6
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
1320 + self.test_result_df.to_html(
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
1321 index=False, classes="table sortable"
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
1322 )
4
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
1323 + "</div>"
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
1324 )
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
1325 if self.task_type == "regression":
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
1326 try:
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
1327 y_true = (
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
1328 pd.Series(self.exp.y_test_transformed)
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
1329 .reset_index(drop=True)
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
1330 .rename("True")
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
1331 )
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
1332 y_pred = pd.Series(
6
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
1333 self.best_model.predict(
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
1334 self.exp.X_test_transformed
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
1335 )
4
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
1336 ).rename("Predicted")
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
1337 df_tp = pd.concat([y_true, y_pred], axis=1)
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
1338 test_html += "<h2>True vs Predicted Values</h2>"
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
1339 test_html += (
6
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
1340 '<div class="table-wrapper" '
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
1341 'style="max-height:400px; overflow-y:auto;">'
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
1342 + df_tp.head(50).to_html(
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
1343 index=False, classes="table sortable"
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
1344 )
4
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
1345 + "</div>"
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
1346 + add_hr_to_html()
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
1347 )
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
1348 except Exception as e:
6
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
1349 LOG.warning(
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
1350 f"Could not generate True vs Predicted table: {e}"
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
1351 )
4
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
1352
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
1353 # 5a) Explainer-substituted plots in order
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
1354 if self.task_type == "regression":
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
1355 test_order = ["residuals"]
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
1356 else:
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
1357 test_order = [
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
1358 "confusion_matrix",
13
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
1359 "class_report",
4
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
1360 "roc_auc",
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
1361 "pr_auc",
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
1362 "lift_curve",
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
1363 "cumulative_precision",
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
1364 ]
13
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
1365 rendered_test_plots = set()
4
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
1366 for key in test_order:
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
1367 fig_or_fn = self.explainer_plots.pop(key, None)
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
1368 if fig_or_fn is not None:
12
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 10
diff changeset
1369 fig = self._resolve_plot_callable(
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 10
diff changeset
1370 key, fig_or_fn, section="test/explainer"
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 10
diff changeset
1371 )
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 10
diff changeset
1372 if fig is None:
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 10
diff changeset
1373 continue
13
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
1374 rendered_test_plots.add(key)
6
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
1375 title = plot_title_map.get(
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
1376 key, key.replace("_", " ").title()
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
1377 )
4
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
1378 test_html += (
6
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
1379 f"<h2>{title}</h2>" + add_plot_to_html(fig)
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
1380 + add_hr_to_html()
4
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
1381 )
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
1382 # 5b) Remaining PyCaret test plots
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
1383 for name, path in self.plots.items():
6
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
1384 # classification: include only the small extras, before
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
1385 # skipping anything
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
1386 if self.task_type == "classification" and (
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
1387 name in {
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
1388 "pr_auc",
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
1389 "class_report",
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
1390 }
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
1391 ):
13
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
1392 if name in rendered_test_plots:
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
1393 continue
6
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
1394 title = plot_title_map.get(
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
1395 name, name.replace("_", " ").title()
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
1396 )
4
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
1397 b64 = encode_image_to_base64(path)
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
1398 test_html += (
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
1399 f"<h2>{title}</h2>"
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
1400 "<div class='plot'>"
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
1401 f"<img src='data:image/png;base64,{b64}' "
6
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
1402 "style='max-width:90%;max-height:600px;"
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
1403 "border:1px solid #ddd;'/>"
4
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
1404 "</div>" + add_hr_to_html()
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
1405 )
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
1406 continue
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
1407
6
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
1408 # regression: explicitly include the 'error' plot,
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
1409 # before skipping
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
1410 if self.task_type == "regression" and (
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
1411 name == "error"
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
1412 ):
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
1413 title = plot_title_map.get(
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
1414 "error", "Prediction Error Distribution"
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
1415 )
4
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
1416 b64 = encode_image_to_base64(path)
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
1417 test_html += (
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
1418 f"<h2>{title}</h2>"
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
1419 "<div class='plot'>"
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
1420 f"<img src='data:image/png;base64,{b64}' "
6
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
1421 "style='max-width:90%;max-height:600px;"
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
1422 "border:1px solid #ddd;'/>"
4
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
1423 "</div>" + add_hr_to_html()
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
1424 )
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
1425 continue
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
1426
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
1427 # now skip any plots already rendered via test_order
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
1428 if name in test_order:
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
1429 continue
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
1430
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
1431 # — Feature Importance —
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
1432 feature_html = header
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
1433
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
1434 # 6a) PyCaret’s default feature importances
12
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 10
diff changeset
1435 imputed_data = (
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 10
diff changeset
1436 self.imputed_training_data
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 10
diff changeset
1437 if self.imputed_training_data is not None
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 10
diff changeset
1438 else self.data
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 10
diff changeset
1439 )
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 10
diff changeset
1440 fi_analyzer = FeatureImportanceAnalyzer(
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 10
diff changeset
1441 data=imputed_data,
0
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
1442 target_col=self.target_col,
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
1443 task_type=self.task_type,
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
1444 output_dir=self.output_dir,
2
77c88226bfde planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1445 exp=self.exp,
77c88226bfde planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1446 best_model=self.best_model,
12
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 10
diff changeset
1447 max_plot_features=self.plot_feature_limit,
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 10
diff changeset
1448 processed_data=self.imputed_training_data,
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 10
diff changeset
1449 max_shap_rows=self._shap_row_cap,
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 10
diff changeset
1450 )
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 10
diff changeset
1451 fi_html = fi_analyzer.run()
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 10
diff changeset
1452 # Add a small table to show SHAP feature caps near the Best Model header.
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 10
diff changeset
1453 cap_rows = []
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 10
diff changeset
1454 if fi_analyzer.shap_total_features is not None:
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 10
diff changeset
1455 cap_rows.append(
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 10
diff changeset
1456 ("Total transformed features", fi_analyzer.shap_total_features)
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 10
diff changeset
1457 )
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 10
diff changeset
1458 if fi_analyzer.shap_used_features is not None:
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 10
diff changeset
1459 cap_rows.append(
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 10
diff changeset
1460 ("Features used in SHAP", fi_analyzer.shap_used_features)
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 10
diff changeset
1461 )
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 10
diff changeset
1462 if cap_rows:
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 10
diff changeset
1463 cap_table = (
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 10
diff changeset
1464 "<div class='table-wrapper'>"
13
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
1465 "<table class='table sortable table-fi-scope'>"
12
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 10
diff changeset
1466 "<thead><tr><th>Feature Importance Scope</th><th>Count</th></tr></thead>"
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 10
diff changeset
1467 "<tbody>"
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 10
diff changeset
1468 + "".join(
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 10
diff changeset
1469 f"<tr><td>{label}</td><td>{value}</td></tr>"
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 10
diff changeset
1470 for label, value in cap_rows
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 10
diff changeset
1471 )
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 10
diff changeset
1472 + "</tbody></table></div>"
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 10
diff changeset
1473 )
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 10
diff changeset
1474 feature_html += cap_table
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 10
diff changeset
1475 feature_html += fi_html
0
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
1476
4
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
1477 # 6b) Explainer SHAP importances
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
1478 for key in ["shap_mean", "shap_perm"]:
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
1479 fig_or_fn = self.explainer_plots.pop(key, None)
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
1480 if fig_or_fn is not None:
12
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 10
diff changeset
1481 fig = self._resolve_plot_callable(
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 10
diff changeset
1482 key, fig_or_fn, section="feature importance"
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 10
diff changeset
1483 )
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 10
diff changeset
1484 if fig is None:
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 10
diff changeset
1485 continue
4
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
1486 # give SHAP plots explicit titles
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
1487 title = (
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
1488 "Mean Absolute SHAP Value Impact"
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
1489 if key == "shap_mean"
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
1490 else "Permutation Feature Importance"
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
1491 )
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
1492 feature_html += (
6
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
1493 f"<h2>{title}</h2>" + add_plot_to_html(fig)
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
1494 + add_hr_to_html()
4
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
1495 )
2
77c88226bfde planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1496
4
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
1497 # 6c) PDPs last
6
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
1498 pdp_keys = sorted(
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
1499 k for k in self.explainer_plots if k.startswith("pdp__")
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
1500 )
4
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
1501 for k in pdp_keys:
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
1502 fig_or_fn = self.explainer_plots[k]
12
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 10
diff changeset
1503 fig = self._resolve_plot_callable(
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 10
diff changeset
1504 k, fig_or_fn, section="pdp"
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 10
diff changeset
1505 )
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 10
diff changeset
1506 if fig is None:
15707141e7da planemo upload for repository https://github.com/goeckslab/gleam commit 2b826699ef9518d4610f5cfb6468ce719ec8039d
goeckslab
parents: 10
diff changeset
1507 continue
4
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
1508 # extract feature name
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
1509 feature = k.split("__", 1)[1]
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
1510 title = f"Partial Dependence for {feature}"
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
1511 feature_html += (
6
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
1512 f"<h2>{title}</h2>" + add_plot_to_html(fig)
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
1513 + add_hr_to_html()
2
77c88226bfde planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1514 )
4
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
1515 # 7) Assemble final HTML (three tabs)
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
1516 html = get_html_template()
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
1517 html += "<h1>Tabular Learner Model Report</h1>"
13
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
1518 html += build_tabbed_html(
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
1519 summary_html,
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
1520 test_html,
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
1521 feature_html,
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
1522 explainer_html=None,
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
1523 config_html=config_html,
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
1524 )
4
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
1525 html += get_feature_metrics_help_modal()
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
1526 html += get_html_closing()
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
1527
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
1528 # 8) Write out
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
1529 (Path(self.output_dir) / "comparison_result.html").write_text(
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
1530 html, encoding="utf-8"
2
77c88226bfde planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1531 )
6
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
1532 LOG.info(
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
1533 f"HTML report generated at: "
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
1534 f"{self.output_dir}/comparison_result.html"
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
1535 )
0
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
1536
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
1537 def save_dashboard(self):
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
1538 raise NotImplementedError("Subclasses should implement this method")
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
1539
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
1540 def generate_plots_explainer(self):
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
1541 raise NotImplementedError("Subclasses should implement this method")
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
1542
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
1543 def generate_tree_plots(self):
13
bf0df21a1ea3 planemo upload for repository https://github.com/goeckslab/gleam commit 7fc20c9ddc2b641975138c9d67b5da240af0484c
goeckslab
parents: 12
diff changeset
1544 from explainerdashboard.explainers import RandomForestExplainer
6
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
1545 from sklearn.ensemble import (
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
1546 RandomForestClassifier, RandomForestRegressor
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
1547 )
0
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
1548 from xgboost import XGBClassifier, XGBRegressor
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
1549
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
1550 LOG.info("Generating tree plots")
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
1551 X_test = self.exp.X_test_transformed.copy()
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
1552 y_test = self.exp.y_test_transformed
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
1553
6
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
1554 if isinstance(
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
1555 self.best_model, (RandomForestClassifier, RandomForestRegressor)
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
1556 ):
4
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
1557 n_trees = self.best_model.n_estimators
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
1558 elif isinstance(self.best_model, (XGBClassifier, XGBRegressor)):
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
1559 n_trees = len(self.best_model.get_booster().get_dump())
2
77c88226bfde planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1560 else:
77c88226bfde planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1561 LOG.warning("Tree plots not supported for this model type.")
77c88226bfde planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1562 return
0
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
1563
4
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
1564 explainer = RandomForestExplainer(self.best_model, X_test, y_test)
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
1565 for i in range(n_trees):
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
1566 fig = explainer.decisiontree_encoded(tree_idx=i, index=0)
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
1567 self.trees.append(fig)
0
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
1568
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
1569 def run(self):
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
1570 self.load_data()
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
1571 self.setup_pycaret()
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
1572 self.train_model()
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
1573 self.save_model()
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
1574 self.generate_plots()
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
1575 self.generate_plots_explainer()
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
1576 self.generate_tree_plots()
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
1577 self.save_html_report()
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
1578 # self.save_dashboard()