annotate metrics_logic.py @ 9:3719606b94af draft default tip

planemo upload for repository https://github.com/goeckslab/gleam.git commit fe979d767542b1f109ccb2b74a5a82b04026bf8c
author goeckslab
date Tue, 03 Feb 2026 02:38:43 +0000
parents a48e750cfd25
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
1 from collections import OrderedDict
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
2 from typing import Dict, Optional, Tuple
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
3
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
4 import numpy as np
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
5 import pandas as pd
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
6 from sklearn.metrics import (
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
7 accuracy_score,
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
8 average_precision_score,
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
9 cohen_kappa_score,
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
10 confusion_matrix,
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
11 f1_score,
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
12 log_loss,
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
13 matthews_corrcoef,
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
14 mean_absolute_error,
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
15 mean_squared_error,
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
16 median_absolute_error,
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
17 precision_score,
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
18 r2_score,
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
19 recall_score,
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
20 roc_auc_score,
8
a48e750cfd25 planemo upload for repository https://github.com/goeckslab/gleam.git commit c8a7fef0c54c269afd6c6bdf035af1a7574d11cb
goeckslab
parents: 0
diff changeset
21 roc_curve
0
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
22 )
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
23
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
24
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
25 # -------------------- Transparent Metrics (task-aware) -------------------- #
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
26
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
27 def _safe_y_proba_to_array(y_proba) -> Optional[np.ndarray]:
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
28 """Convert predictor.predict_proba output (array/DataFrame/dict) to np.ndarray or None."""
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
29 if y_proba is None:
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
30 return None
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
31 if isinstance(y_proba, pd.DataFrame):
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
32 return y_proba.values
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
33 if isinstance(y_proba, (list, tuple)):
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
34 return np.asarray(y_proba)
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
35 if isinstance(y_proba, np.ndarray):
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
36 return y_proba
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
37 if isinstance(y_proba, dict):
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
38 try:
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
39 return np.vstack([np.asarray(v) for _, v in sorted(y_proba.items())]).T
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
40 except Exception:
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
41 return None
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
42 return None
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
43
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
44
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
45 def _specificity_from_cm(cm: np.ndarray) -> float:
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
46 """Specificity (TNR) for binary confusion matrix."""
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
47 if cm.shape != (2, 2):
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
48 return np.nan
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
49 tn, fp, fn, tp = cm.ravel()
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
50 denom = (tn + fp)
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
51 return float(tn / denom) if denom > 0 else np.nan
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
52
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
53
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
54 def _compute_regression_metrics(y_true: np.ndarray, y_pred: np.ndarray) -> "OrderedDict[str, float]":
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
55 mse = mean_squared_error(y_true, y_pred)
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
56 rmse = float(np.sqrt(mse))
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
57 mae = mean_absolute_error(y_true, y_pred)
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
58 # Avoid division by zero using clip
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
59 mape = float(np.mean(np.abs((y_true - y_pred) / np.clip(np.abs(y_true), 1e-12, None))) * 100.0)
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
60 r2 = r2_score(y_true, y_pred)
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
61 medae = median_absolute_error(y_true, y_pred)
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
62
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
63 metrics = OrderedDict()
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
64 metrics["MSE"] = mse
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
65 metrics["RMSE"] = rmse
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
66 metrics["MAE"] = mae
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
67 metrics["MAPE_%"] = mape
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
68 metrics["R2"] = r2
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
69 metrics["MedianAE"] = medae
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
70 return metrics
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
71
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
72
8
a48e750cfd25 planemo upload for repository https://github.com/goeckslab/gleam.git commit c8a7fef0c54c269afd6c6bdf035af1a7574d11cb
goeckslab
parents: 0
diff changeset
73 def _get_binary_scores(
a48e750cfd25 planemo upload for repository https://github.com/goeckslab/gleam.git commit c8a7fef0c54c269afd6c6bdf035af1a7574d11cb
goeckslab
parents: 0
diff changeset
74 y_true: pd.Series,
a48e750cfd25 planemo upload for repository https://github.com/goeckslab/gleam.git commit c8a7fef0c54c269afd6c6bdf035af1a7574d11cb
goeckslab
parents: 0
diff changeset
75 y_proba: Optional[np.ndarray],
a48e750cfd25 planemo upload for repository https://github.com/goeckslab/gleam.git commit c8a7fef0c54c269afd6c6bdf035af1a7574d11cb
goeckslab
parents: 0
diff changeset
76 predictor,
a48e750cfd25 planemo upload for repository https://github.com/goeckslab/gleam.git commit c8a7fef0c54c269afd6c6bdf035af1a7574d11cb
goeckslab
parents: 0
diff changeset
77 ) -> Tuple[np.ndarray, object, Optional[np.ndarray]]:
a48e750cfd25 planemo upload for repository https://github.com/goeckslab/gleam.git commit c8a7fef0c54c269afd6c6bdf035af1a7574d11cb
goeckslab
parents: 0
diff changeset
78 classes_sorted = np.sort(pd.unique(y_true))
a48e750cfd25 planemo upload for repository https://github.com/goeckslab/gleam.git commit c8a7fef0c54c269afd6c6bdf035af1a7574d11cb
goeckslab
parents: 0
diff changeset
79 pos_label = classes_sorted[-1]
a48e750cfd25 planemo upload for repository https://github.com/goeckslab/gleam.git commit c8a7fef0c54c269afd6c6bdf035af1a7574d11cb
goeckslab
parents: 0
diff changeset
80 pos_scores = None
a48e750cfd25 planemo upload for repository https://github.com/goeckslab/gleam.git commit c8a7fef0c54c269afd6c6bdf035af1a7574d11cb
goeckslab
parents: 0
diff changeset
81 if y_proba is not None:
a48e750cfd25 planemo upload for repository https://github.com/goeckslab/gleam.git commit c8a7fef0c54c269afd6c6bdf035af1a7574d11cb
goeckslab
parents: 0
diff changeset
82 if y_proba.ndim == 1:
a48e750cfd25 planemo upload for repository https://github.com/goeckslab/gleam.git commit c8a7fef0c54c269afd6c6bdf035af1a7574d11cb
goeckslab
parents: 0
diff changeset
83 pos_scores = y_proba
a48e750cfd25 planemo upload for repository https://github.com/goeckslab/gleam.git commit c8a7fef0c54c269afd6c6bdf035af1a7574d11cb
goeckslab
parents: 0
diff changeset
84 else:
a48e750cfd25 planemo upload for repository https://github.com/goeckslab/gleam.git commit c8a7fef0c54c269afd6c6bdf035af1a7574d11cb
goeckslab
parents: 0
diff changeset
85 pos_col_idx = -1
a48e750cfd25 planemo upload for repository https://github.com/goeckslab/gleam.git commit c8a7fef0c54c269afd6c6bdf035af1a7574d11cb
goeckslab
parents: 0
diff changeset
86 try:
a48e750cfd25 planemo upload for repository https://github.com/goeckslab/gleam.git commit c8a7fef0c54c269afd6c6bdf035af1a7574d11cb
goeckslab
parents: 0
diff changeset
87 if hasattr(predictor, "class_labels") and predictor.class_labels:
a48e750cfd25 planemo upload for repository https://github.com/goeckslab/gleam.git commit c8a7fef0c54c269afd6c6bdf035af1a7574d11cb
goeckslab
parents: 0
diff changeset
88 pos_col_idx = list(predictor.class_labels).index(pos_label)
a48e750cfd25 planemo upload for repository https://github.com/goeckslab/gleam.git commit c8a7fef0c54c269afd6c6bdf035af1a7574d11cb
goeckslab
parents: 0
diff changeset
89 except Exception:
a48e750cfd25 planemo upload for repository https://github.com/goeckslab/gleam.git commit c8a7fef0c54c269afd6c6bdf035af1a7574d11cb
goeckslab
parents: 0
diff changeset
90 pos_col_idx = -1
a48e750cfd25 planemo upload for repository https://github.com/goeckslab/gleam.git commit c8a7fef0c54c269afd6c6bdf035af1a7574d11cb
goeckslab
parents: 0
diff changeset
91 pos_scores = y_proba[:, pos_col_idx]
a48e750cfd25 planemo upload for repository https://github.com/goeckslab/gleam.git commit c8a7fef0c54c269afd6c6bdf035af1a7574d11cb
goeckslab
parents: 0
diff changeset
92 return classes_sorted, pos_label, pos_scores
a48e750cfd25 planemo upload for repository https://github.com/goeckslab/gleam.git commit c8a7fef0c54c269afd6c6bdf035af1a7574d11cb
goeckslab
parents: 0
diff changeset
93
a48e750cfd25 planemo upload for repository https://github.com/goeckslab/gleam.git commit c8a7fef0c54c269afd6c6bdf035af1a7574d11cb
goeckslab
parents: 0
diff changeset
94
0
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
95 def _compute_binary_metrics(
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
96 y_true: pd.Series,
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
97 y_pred: pd.Series,
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
98 y_proba: Optional[np.ndarray],
8
a48e750cfd25 planemo upload for repository https://github.com/goeckslab/gleam.git commit c8a7fef0c54c269afd6c6bdf035af1a7574d11cb
goeckslab
parents: 0
diff changeset
99 predictor,
a48e750cfd25 planemo upload for repository https://github.com/goeckslab/gleam.git commit c8a7fef0c54c269afd6c6bdf035af1a7574d11cb
goeckslab
parents: 0
diff changeset
100 classes_sorted: Optional[np.ndarray] = None,
a48e750cfd25 planemo upload for repository https://github.com/goeckslab/gleam.git commit c8a7fef0c54c269afd6c6bdf035af1a7574d11cb
goeckslab
parents: 0
diff changeset
101 pos_label: Optional[object] = None,
a48e750cfd25 planemo upload for repository https://github.com/goeckslab/gleam.git commit c8a7fef0c54c269afd6c6bdf035af1a7574d11cb
goeckslab
parents: 0
diff changeset
102 pos_scores: Optional[np.ndarray] = None,
0
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
103 ) -> "OrderedDict[str, float]":
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
104 metrics = OrderedDict()
8
a48e750cfd25 planemo upload for repository https://github.com/goeckslab/gleam.git commit c8a7fef0c54c269afd6c6bdf035af1a7574d11cb
goeckslab
parents: 0
diff changeset
105 if classes_sorted is None or pos_label is None or pos_scores is None:
a48e750cfd25 planemo upload for repository https://github.com/goeckslab/gleam.git commit c8a7fef0c54c269afd6c6bdf035af1a7574d11cb
goeckslab
parents: 0
diff changeset
106 classes_sorted, pos_label, pos_scores = _get_binary_scores(y_true, y_proba, predictor)
0
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
107
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
108 metrics["Accuracy"] = accuracy_score(y_true, y_pred)
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
109 metrics["Precision"] = precision_score(y_true, y_pred, pos_label=pos_label, zero_division=0)
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
110 metrics["Recall_(Sensitivity/TPR)"] = recall_score(y_true, y_pred, pos_label=pos_label, zero_division=0)
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
111 metrics["F1-Score"] = f1_score(y_true, y_pred, pos_label=pos_label, zero_division=0)
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
112
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
113 try:
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
114 cm = confusion_matrix(y_true, y_pred, labels=classes_sorted)
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
115 metrics["Specificity_(TNR)"] = _specificity_from_cm(cm)
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
116 except Exception:
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
117 metrics["Specificity_(TNR)"] = np.nan
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
118
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
119 # Probabilistic metrics
8
a48e750cfd25 planemo upload for repository https://github.com/goeckslab/gleam.git commit c8a7fef0c54c269afd6c6bdf035af1a7574d11cb
goeckslab
parents: 0
diff changeset
120 if y_proba is not None and pos_scores is not None:
0
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
121 try:
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
122 metrics["ROC-AUC"] = roc_auc_score(y_true == pos_label, pos_scores)
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
123 except Exception:
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
124 metrics["ROC-AUC"] = np.nan
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
125 try:
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
126 metrics["PR-AUC"] = average_precision_score(y_true == pos_label, pos_scores)
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
127 except Exception:
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
128 metrics["PR-AUC"] = np.nan
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
129 try:
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
130 if y_proba.ndim == 1:
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
131 y_proba_ll = np.column_stack([1 - pos_scores, pos_scores])
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
132 else:
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
133 y_proba_ll = y_proba
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
134 metrics["LogLoss"] = log_loss(y_true, y_proba_ll, labels=classes_sorted)
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
135 except Exception:
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
136 metrics["LogLoss"] = np.nan
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
137 else:
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
138 metrics["ROC-AUC"] = np.nan
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
139 metrics["PR-AUC"] = np.nan
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
140 metrics["LogLoss"] = np.nan
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
141
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
142 try:
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
143 metrics["MCC"] = matthews_corrcoef(y_true, y_pred)
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
144 except Exception:
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
145 metrics["MCC"] = np.nan
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
146
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
147 return metrics
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
148
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
149
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
150 def _compute_multiclass_metrics(
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
151 y_true: pd.Series,
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
152 y_pred: pd.Series,
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
153 y_proba: Optional[np.ndarray]
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
154 ) -> "OrderedDict[str, float]":
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
155 metrics = OrderedDict()
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
156 metrics["Accuracy"] = accuracy_score(y_true, y_pred)
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
157 metrics["Macro Precision"] = precision_score(y_true, y_pred, average="macro", zero_division=0)
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
158 metrics["Macro Recall"] = recall_score(y_true, y_pred, average="macro", zero_division=0)
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
159 metrics["Macro F1"] = f1_score(y_true, y_pred, average="macro", zero_division=0)
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
160 metrics["Weighted Precision"] = precision_score(y_true, y_pred, average="weighted", zero_division=0)
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
161 metrics["Weighted Recall"] = recall_score(y_true, y_pred, average="weighted", zero_division=0)
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
162 metrics["Weighted F1"] = f1_score(y_true, y_pred, average="weighted", zero_division=0)
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
163
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
164 try:
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
165 metrics["Cohen_Kappa"] = cohen_kappa_score(y_true, y_pred)
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
166 except Exception:
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
167 metrics["Cohen_Kappa"] = np.nan
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
168 try:
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
169 metrics["MCC"] = matthews_corrcoef(y_true, y_pred)
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
170 except Exception:
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
171 metrics["MCC"] = np.nan
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
172
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
173 # Probabilistic metrics
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
174 classes_sorted = np.sort(pd.unique(y_true))
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
175 if y_proba is not None and y_proba.ndim == 2:
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
176 try:
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
177 metrics["LogLoss"] = log_loss(y_true, y_proba, labels=classes_sorted)
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
178 except Exception:
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
179 metrics["LogLoss"] = np.nan
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
180 # Macro ROC-AUC / PR-AUC via OVR
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
181 try:
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
182 class_to_index = {c: i for i, c in enumerate(classes_sorted)}
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
183 y_true_idx = np.vectorize(class_to_index.get)(y_true)
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
184 metrics["ROC-AUC_macro"] = roc_auc_score(y_true_idx, y_proba, multi_class="ovr", average="macro")
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
185 except Exception:
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
186 metrics["ROC-AUC_macro"] = np.nan
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
187 try:
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
188 Y_true_ind = np.zeros_like(y_proba)
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
189 idx_map = {c: i for i, c in enumerate(classes_sorted)}
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
190 Y_true_ind[np.arange(y_proba.shape[0]), np.vectorize(idx_map.get)(y_true)] = 1
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
191 metrics["PR-AUC_macro"] = average_precision_score(Y_true_ind, y_proba, average="macro")
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
192 except Exception:
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
193 metrics["PR-AUC_macro"] = np.nan
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
194 else:
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
195 metrics["LogLoss"] = np.nan
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
196 metrics["ROC-AUC_macro"] = np.nan
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
197 metrics["PR-AUC_macro"] = np.nan
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
198
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
199 return metrics
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
200
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
201
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
202 def aggregate_metrics(list_of_dicts):
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
203 """Aggregate a list of metrics dicts (per split) into mean/std."""
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
204 agg_mean = {}
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
205 agg_std = {}
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
206 for split in ("Train", "Validation", "Test", "Test (external)"):
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
207 keys = set()
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
208 for m in list_of_dicts:
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
209 if isinstance(m, dict) and split in m:
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
210 keys.update(m[split].keys())
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
211 if not keys:
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
212 continue
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
213 agg_mean[split] = {}
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
214 agg_std[split] = {}
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
215 for k in keys:
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
216 vals = [m[split][k] for m in list_of_dicts if split in m and k in m[split]]
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
217 numeric_vals = []
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
218 for v in vals:
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
219 try:
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
220 numeric_vals.append(float(v))
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
221 except Exception:
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
222 pass
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
223 if numeric_vals:
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
224 agg_mean[split][k] = float(np.mean(numeric_vals))
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
225 agg_std[split][k] = float(np.std(numeric_vals, ddof=0))
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
226 else:
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
227 agg_mean[split][k] = vals[-1] if vals else None
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
228 agg_std[split][k] = None
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
229 return agg_mean, agg_std
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
230
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
231
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
232 def compute_metrics_for_split(
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
233 predictor,
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
234 df: pd.DataFrame,
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
235 target_col: str,
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
236 problem_type: str,
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
237 threshold: Optional[float] = None, # <— NEW
8
a48e750cfd25 planemo upload for repository https://github.com/goeckslab/gleam.git commit c8a7fef0c54c269afd6c6bdf035af1a7574d11cb
goeckslab
parents: 0
diff changeset
238 return_curve: bool = False,
a48e750cfd25 planemo upload for repository https://github.com/goeckslab/gleam.git commit c8a7fef0c54c269afd6c6bdf035af1a7574d11cb
goeckslab
parents: 0
diff changeset
239 ) -> "OrderedDict[str, float] | Tuple[OrderedDict[str, float], Optional[dict]]":
0
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
240 """Compute transparency metrics for one split (Train/Val/Test) based on task type."""
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
241 # Prepare inputs
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
242 features = df.drop(columns=[target_col], errors="ignore")
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
243 y_true_series = df[target_col].reset_index(drop=True)
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
244
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
245 # Probabilities (if available)
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
246 y_proba = None
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
247 try:
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
248 y_proba_raw = predictor.predict_proba(features)
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
249 y_proba = _safe_y_proba_to_array(y_proba_raw)
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
250 except Exception:
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
251 y_proba = None
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
252
8
a48e750cfd25 planemo upload for repository https://github.com/goeckslab/gleam.git commit c8a7fef0c54c269afd6c6bdf035af1a7574d11cb
goeckslab
parents: 0
diff changeset
253 classes_sorted = pos_label = pos_scores = None
a48e750cfd25 planemo upload for repository https://github.com/goeckslab/gleam.git commit c8a7fef0c54c269afd6c6bdf035af1a7574d11cb
goeckslab
parents: 0
diff changeset
254 if problem_type == "binary":
a48e750cfd25 planemo upload for repository https://github.com/goeckslab/gleam.git commit c8a7fef0c54c269afd6c6bdf035af1a7574d11cb
goeckslab
parents: 0
diff changeset
255 classes_sorted, pos_label, pos_scores = _get_binary_scores(y_true_series, y_proba, predictor)
a48e750cfd25 planemo upload for repository https://github.com/goeckslab/gleam.git commit c8a7fef0c54c269afd6c6bdf035af1a7574d11cb
goeckslab
parents: 0
diff changeset
256
0
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
257 # Labels (optionally thresholded for binary)
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
258 y_pred_series = None
8
a48e750cfd25 planemo upload for repository https://github.com/goeckslab/gleam.git commit c8a7fef0c54c269afd6c6bdf035af1a7574d11cb
goeckslab
parents: 0
diff changeset
259 if problem_type == "binary" and (threshold is not None) and (pos_scores is not None):
0
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
260 neg_label = classes_sorted[0]
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
261 y_pred_series = pd.Series(np.where(pos_scores >= float(threshold), pos_label, neg_label)).reset_index(drop=True)
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
262 else:
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
263 # Fall back to model's default label prediction (argmax / 0.5 equivalent)
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
264 y_pred_series = pd.Series(predictor.predict(features)).reset_index(drop=True)
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
265
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
266 if problem_type == "regression":
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
267 y_true_arr = np.asarray(y_true_series, dtype=float)
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
268 y_pred_arr = np.asarray(y_pred_series, dtype=float)
8
a48e750cfd25 planemo upload for repository https://github.com/goeckslab/gleam.git commit c8a7fef0c54c269afd6c6bdf035af1a7574d11cb
goeckslab
parents: 0
diff changeset
269 metrics = _compute_regression_metrics(y_true_arr, y_pred_arr)
a48e750cfd25 planemo upload for repository https://github.com/goeckslab/gleam.git commit c8a7fef0c54c269afd6c6bdf035af1a7574d11cb
goeckslab
parents: 0
diff changeset
270 return (metrics, None) if return_curve else metrics
0
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
271
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
272 if problem_type == "binary":
8
a48e750cfd25 planemo upload for repository https://github.com/goeckslab/gleam.git commit c8a7fef0c54c269afd6c6bdf035af1a7574d11cb
goeckslab
parents: 0
diff changeset
273 metrics = _compute_binary_metrics(
a48e750cfd25 planemo upload for repository https://github.com/goeckslab/gleam.git commit c8a7fef0c54c269afd6c6bdf035af1a7574d11cb
goeckslab
parents: 0
diff changeset
274 y_true_series,
a48e750cfd25 planemo upload for repository https://github.com/goeckslab/gleam.git commit c8a7fef0c54c269afd6c6bdf035af1a7574d11cb
goeckslab
parents: 0
diff changeset
275 y_pred_series,
a48e750cfd25 planemo upload for repository https://github.com/goeckslab/gleam.git commit c8a7fef0c54c269afd6c6bdf035af1a7574d11cb
goeckslab
parents: 0
diff changeset
276 y_proba,
a48e750cfd25 planemo upload for repository https://github.com/goeckslab/gleam.git commit c8a7fef0c54c269afd6c6bdf035af1a7574d11cb
goeckslab
parents: 0
diff changeset
277 predictor,
a48e750cfd25 planemo upload for repository https://github.com/goeckslab/gleam.git commit c8a7fef0c54c269afd6c6bdf035af1a7574d11cb
goeckslab
parents: 0
diff changeset
278 classes_sorted=classes_sorted,
a48e750cfd25 planemo upload for repository https://github.com/goeckslab/gleam.git commit c8a7fef0c54c269afd6c6bdf035af1a7574d11cb
goeckslab
parents: 0
diff changeset
279 pos_label=pos_label,
a48e750cfd25 planemo upload for repository https://github.com/goeckslab/gleam.git commit c8a7fef0c54c269afd6c6bdf035af1a7574d11cb
goeckslab
parents: 0
diff changeset
280 pos_scores=pos_scores,
a48e750cfd25 planemo upload for repository https://github.com/goeckslab/gleam.git commit c8a7fef0c54c269afd6c6bdf035af1a7574d11cb
goeckslab
parents: 0
diff changeset
281 )
a48e750cfd25 planemo upload for repository https://github.com/goeckslab/gleam.git commit c8a7fef0c54c269afd6c6bdf035af1a7574d11cb
goeckslab
parents: 0
diff changeset
282 roc_curve_data = None
a48e750cfd25 planemo upload for repository https://github.com/goeckslab/gleam.git commit c8a7fef0c54c269afd6c6bdf035af1a7574d11cb
goeckslab
parents: 0
diff changeset
283 if return_curve and pos_scores is not None and pos_label is not None:
a48e750cfd25 planemo upload for repository https://github.com/goeckslab/gleam.git commit c8a7fef0c54c269afd6c6bdf035af1a7574d11cb
goeckslab
parents: 0
diff changeset
284 try:
a48e750cfd25 planemo upload for repository https://github.com/goeckslab/gleam.git commit c8a7fef0c54c269afd6c6bdf035af1a7574d11cb
goeckslab
parents: 0
diff changeset
285 fpr, tpr, thresholds = roc_curve(y_true_series == pos_label, pos_scores)
a48e750cfd25 planemo upload for repository https://github.com/goeckslab/gleam.git commit c8a7fef0c54c269afd6c6bdf035af1a7574d11cb
goeckslab
parents: 0
diff changeset
286 roc_curve_data = {
a48e750cfd25 planemo upload for repository https://github.com/goeckslab/gleam.git commit c8a7fef0c54c269afd6c6bdf035af1a7574d11cb
goeckslab
parents: 0
diff changeset
287 "fpr": fpr.tolist(),
a48e750cfd25 planemo upload for repository https://github.com/goeckslab/gleam.git commit c8a7fef0c54c269afd6c6bdf035af1a7574d11cb
goeckslab
parents: 0
diff changeset
288 "tpr": tpr.tolist(),
a48e750cfd25 planemo upload for repository https://github.com/goeckslab/gleam.git commit c8a7fef0c54c269afd6c6bdf035af1a7574d11cb
goeckslab
parents: 0
diff changeset
289 "thresholds": thresholds.tolist(),
a48e750cfd25 planemo upload for repository https://github.com/goeckslab/gleam.git commit c8a7fef0c54c269afd6c6bdf035af1a7574d11cb
goeckslab
parents: 0
diff changeset
290 }
a48e750cfd25 planemo upload for repository https://github.com/goeckslab/gleam.git commit c8a7fef0c54c269afd6c6bdf035af1a7574d11cb
goeckslab
parents: 0
diff changeset
291 except Exception:
a48e750cfd25 planemo upload for repository https://github.com/goeckslab/gleam.git commit c8a7fef0c54c269afd6c6bdf035af1a7574d11cb
goeckslab
parents: 0
diff changeset
292 roc_curve_data = None
a48e750cfd25 planemo upload for repository https://github.com/goeckslab/gleam.git commit c8a7fef0c54c269afd6c6bdf035af1a7574d11cb
goeckslab
parents: 0
diff changeset
293 return (metrics, roc_curve_data) if return_curve else metrics
0
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
294
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
295 # multiclass
8
a48e750cfd25 planemo upload for repository https://github.com/goeckslab/gleam.git commit c8a7fef0c54c269afd6c6bdf035af1a7574d11cb
goeckslab
parents: 0
diff changeset
296 metrics = _compute_multiclass_metrics(y_true_series, y_pred_series, y_proba)
a48e750cfd25 planemo upload for repository https://github.com/goeckslab/gleam.git commit c8a7fef0c54c269afd6c6bdf035af1a7574d11cb
goeckslab
parents: 0
diff changeset
297 return (metrics, None) if return_curve else metrics
0
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
298
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
299
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
300 def evaluate_all_transparency(
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
301 predictor,
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
302 train_df: Optional[pd.DataFrame],
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
303 val_df: Optional[pd.DataFrame],
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
304 test_df: Optional[pd.DataFrame],
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
305 target_col: str,
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
306 problem_type: str,
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
307 threshold: Optional[float] = None,
8
a48e750cfd25 planemo upload for repository https://github.com/goeckslab/gleam.git commit c8a7fef0c54c269afd6c6bdf035af1a7574d11cb
goeckslab
parents: 0
diff changeset
308 ) -> Tuple[pd.DataFrame, Dict[str, Dict[str, float]], Dict[str, dict]]:
0
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
309 """
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
310 Evaluate Train/Val/Test with the transparent metrics suite.
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
311 Returns:
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
312 - metrics_table: DataFrame with index=Metric, columns subset of [Train, Validation, Test]
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
313 - raw_dict: nested dict {split -> {metric -> value}}
8
a48e750cfd25 planemo upload for repository https://github.com/goeckslab/gleam.git commit c8a7fef0c54c269afd6c6bdf035af1a7574d11cb
goeckslab
parents: 0
diff changeset
314 - roc_curves: nested dict {split -> {fpr, tpr, thresholds}} (binary only)
0
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
315 """
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
316 split_results: Dict[str, Dict[str, float]] = {}
8
a48e750cfd25 planemo upload for repository https://github.com/goeckslab/gleam.git commit c8a7fef0c54c269afd6c6bdf035af1a7574d11cb
goeckslab
parents: 0
diff changeset
317 roc_curves: Dict[str, dict] = {}
0
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
318 splits = []
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
319
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
320 # IMPORTANT: do NOT apply threshold to Train/Val
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
321 if train_df is not None and len(train_df):
8
a48e750cfd25 planemo upload for repository https://github.com/goeckslab/gleam.git commit c8a7fef0c54c269afd6c6bdf035af1a7574d11cb
goeckslab
parents: 0
diff changeset
322 train_metrics, train_curve = compute_metrics_for_split(
a48e750cfd25 planemo upload for repository https://github.com/goeckslab/gleam.git commit c8a7fef0c54c269afd6c6bdf035af1a7574d11cb
goeckslab
parents: 0
diff changeset
323 predictor,
a48e750cfd25 planemo upload for repository https://github.com/goeckslab/gleam.git commit c8a7fef0c54c269afd6c6bdf035af1a7574d11cb
goeckslab
parents: 0
diff changeset
324 train_df,
a48e750cfd25 planemo upload for repository https://github.com/goeckslab/gleam.git commit c8a7fef0c54c269afd6c6bdf035af1a7574d11cb
goeckslab
parents: 0
diff changeset
325 target_col,
a48e750cfd25 planemo upload for repository https://github.com/goeckslab/gleam.git commit c8a7fef0c54c269afd6c6bdf035af1a7574d11cb
goeckslab
parents: 0
diff changeset
326 problem_type,
a48e750cfd25 planemo upload for repository https://github.com/goeckslab/gleam.git commit c8a7fef0c54c269afd6c6bdf035af1a7574d11cb
goeckslab
parents: 0
diff changeset
327 threshold=None,
a48e750cfd25 planemo upload for repository https://github.com/goeckslab/gleam.git commit c8a7fef0c54c269afd6c6bdf035af1a7574d11cb
goeckslab
parents: 0
diff changeset
328 return_curve=True,
a48e750cfd25 planemo upload for repository https://github.com/goeckslab/gleam.git commit c8a7fef0c54c269afd6c6bdf035af1a7574d11cb
goeckslab
parents: 0
diff changeset
329 )
a48e750cfd25 planemo upload for repository https://github.com/goeckslab/gleam.git commit c8a7fef0c54c269afd6c6bdf035af1a7574d11cb
goeckslab
parents: 0
diff changeset
330 split_results["Train"] = train_metrics
a48e750cfd25 planemo upload for repository https://github.com/goeckslab/gleam.git commit c8a7fef0c54c269afd6c6bdf035af1a7574d11cb
goeckslab
parents: 0
diff changeset
331 if train_curve:
a48e750cfd25 planemo upload for repository https://github.com/goeckslab/gleam.git commit c8a7fef0c54c269afd6c6bdf035af1a7574d11cb
goeckslab
parents: 0
diff changeset
332 roc_curves["Train"] = train_curve
0
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
333 splits.append("Train")
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
334 if val_df is not None and len(val_df):
8
a48e750cfd25 planemo upload for repository https://github.com/goeckslab/gleam.git commit c8a7fef0c54c269afd6c6bdf035af1a7574d11cb
goeckslab
parents: 0
diff changeset
335 val_metrics, val_curve = compute_metrics_for_split(
a48e750cfd25 planemo upload for repository https://github.com/goeckslab/gleam.git commit c8a7fef0c54c269afd6c6bdf035af1a7574d11cb
goeckslab
parents: 0
diff changeset
336 predictor,
a48e750cfd25 planemo upload for repository https://github.com/goeckslab/gleam.git commit c8a7fef0c54c269afd6c6bdf035af1a7574d11cb
goeckslab
parents: 0
diff changeset
337 val_df,
a48e750cfd25 planemo upload for repository https://github.com/goeckslab/gleam.git commit c8a7fef0c54c269afd6c6bdf035af1a7574d11cb
goeckslab
parents: 0
diff changeset
338 target_col,
a48e750cfd25 planemo upload for repository https://github.com/goeckslab/gleam.git commit c8a7fef0c54c269afd6c6bdf035af1a7574d11cb
goeckslab
parents: 0
diff changeset
339 problem_type,
a48e750cfd25 planemo upload for repository https://github.com/goeckslab/gleam.git commit c8a7fef0c54c269afd6c6bdf035af1a7574d11cb
goeckslab
parents: 0
diff changeset
340 threshold=None,
a48e750cfd25 planemo upload for repository https://github.com/goeckslab/gleam.git commit c8a7fef0c54c269afd6c6bdf035af1a7574d11cb
goeckslab
parents: 0
diff changeset
341 return_curve=True,
a48e750cfd25 planemo upload for repository https://github.com/goeckslab/gleam.git commit c8a7fef0c54c269afd6c6bdf035af1a7574d11cb
goeckslab
parents: 0
diff changeset
342 )
a48e750cfd25 planemo upload for repository https://github.com/goeckslab/gleam.git commit c8a7fef0c54c269afd6c6bdf035af1a7574d11cb
goeckslab
parents: 0
diff changeset
343 split_results["Validation"] = val_metrics
a48e750cfd25 planemo upload for repository https://github.com/goeckslab/gleam.git commit c8a7fef0c54c269afd6c6bdf035af1a7574d11cb
goeckslab
parents: 0
diff changeset
344 if val_curve:
a48e750cfd25 planemo upload for repository https://github.com/goeckslab/gleam.git commit c8a7fef0c54c269afd6c6bdf035af1a7574d11cb
goeckslab
parents: 0
diff changeset
345 roc_curves["Validation"] = val_curve
0
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
346 splits.append("Validation")
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
347 if test_df is not None and len(test_df):
8
a48e750cfd25 planemo upload for repository https://github.com/goeckslab/gleam.git commit c8a7fef0c54c269afd6c6bdf035af1a7574d11cb
goeckslab
parents: 0
diff changeset
348 test_metrics, test_curve = compute_metrics_for_split(
a48e750cfd25 planemo upload for repository https://github.com/goeckslab/gleam.git commit c8a7fef0c54c269afd6c6bdf035af1a7574d11cb
goeckslab
parents: 0
diff changeset
349 predictor,
a48e750cfd25 planemo upload for repository https://github.com/goeckslab/gleam.git commit c8a7fef0c54c269afd6c6bdf035af1a7574d11cb
goeckslab
parents: 0
diff changeset
350 test_df,
a48e750cfd25 planemo upload for repository https://github.com/goeckslab/gleam.git commit c8a7fef0c54c269afd6c6bdf035af1a7574d11cb
goeckslab
parents: 0
diff changeset
351 target_col,
a48e750cfd25 planemo upload for repository https://github.com/goeckslab/gleam.git commit c8a7fef0c54c269afd6c6bdf035af1a7574d11cb
goeckslab
parents: 0
diff changeset
352 problem_type,
a48e750cfd25 planemo upload for repository https://github.com/goeckslab/gleam.git commit c8a7fef0c54c269afd6c6bdf035af1a7574d11cb
goeckslab
parents: 0
diff changeset
353 threshold=threshold,
a48e750cfd25 planemo upload for repository https://github.com/goeckslab/gleam.git commit c8a7fef0c54c269afd6c6bdf035af1a7574d11cb
goeckslab
parents: 0
diff changeset
354 return_curve=True,
a48e750cfd25 planemo upload for repository https://github.com/goeckslab/gleam.git commit c8a7fef0c54c269afd6c6bdf035af1a7574d11cb
goeckslab
parents: 0
diff changeset
355 )
a48e750cfd25 planemo upload for repository https://github.com/goeckslab/gleam.git commit c8a7fef0c54c269afd6c6bdf035af1a7574d11cb
goeckslab
parents: 0
diff changeset
356 split_results["Test"] = test_metrics
a48e750cfd25 planemo upload for repository https://github.com/goeckslab/gleam.git commit c8a7fef0c54c269afd6c6bdf035af1a7574d11cb
goeckslab
parents: 0
diff changeset
357 if test_curve:
a48e750cfd25 planemo upload for repository https://github.com/goeckslab/gleam.git commit c8a7fef0c54c269afd6c6bdf035af1a7574d11cb
goeckslab
parents: 0
diff changeset
358 roc_curves["Test"] = test_curve
0
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
359 splits.append("Test")
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
360
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
361 # Preserve order from the first split; include any extras from others
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
362 order_source = split_results[splits[0]] if splits else {}
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
363 all_metrics = list(order_source.keys())
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
364 for s in splits[1:]:
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
365 for m in split_results[s].keys():
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
366 if m not in all_metrics:
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
367 all_metrics.append(m)
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
368
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
369 metrics_table = pd.DataFrame(index=all_metrics, columns=splits, dtype=float)
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
370 for s in splits:
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
371 for m, v in split_results[s].items():
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
372 metrics_table.loc[m, s] = v
375c36923da1 planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff changeset
373
8
a48e750cfd25 planemo upload for repository https://github.com/goeckslab/gleam.git commit c8a7fef0c54c269afd6c6bdf035af1a7574d11cb
goeckslab
parents: 0
diff changeset
374 return metrics_table, split_results, roc_curves