Mercurial > repos > goeckslab > multimodal_learner
annotate metrics_logic.py @ 0:375c36923da1 draft default tip
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
| author | goeckslab |
|---|---|
| date | Tue, 09 Dec 2025 23:49:47 +0000 |
| parents | |
| children |
| rev | line source |
|---|---|
|
0
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
1 from collections import OrderedDict |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
2 from typing import Dict, Optional, Tuple |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
3 |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
4 import numpy as np |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
5 import pandas as pd |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
6 from sklearn.metrics import ( |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
7 accuracy_score, |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
8 average_precision_score, |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
9 cohen_kappa_score, |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
10 confusion_matrix, |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
11 f1_score, |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
12 log_loss, |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
13 matthews_corrcoef, |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
14 mean_absolute_error, |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
15 mean_squared_error, |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
16 median_absolute_error, |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
17 precision_score, |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
18 r2_score, |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
19 recall_score, |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
20 roc_auc_score, |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
21 ) |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
22 |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
23 |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
24 # -------------------- Transparent Metrics (task-aware) -------------------- # |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
25 |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
26 def _safe_y_proba_to_array(y_proba) -> Optional[np.ndarray]: |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
27 """Convert predictor.predict_proba output (array/DataFrame/dict) to np.ndarray or None.""" |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
28 if y_proba is None: |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
29 return None |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
30 if isinstance(y_proba, pd.DataFrame): |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
31 return y_proba.values |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
32 if isinstance(y_proba, (list, tuple)): |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
33 return np.asarray(y_proba) |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
34 if isinstance(y_proba, np.ndarray): |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
35 return y_proba |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
36 if isinstance(y_proba, dict): |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
37 try: |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
38 return np.vstack([np.asarray(v) for _, v in sorted(y_proba.items())]).T |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
39 except Exception: |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
40 return None |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
41 return None |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
42 |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
43 |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
44 def _specificity_from_cm(cm: np.ndarray) -> float: |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
45 """Specificity (TNR) for binary confusion matrix.""" |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
46 if cm.shape != (2, 2): |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
47 return np.nan |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
48 tn, fp, fn, tp = cm.ravel() |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
49 denom = (tn + fp) |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
50 return float(tn / denom) if denom > 0 else np.nan |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
51 |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
52 |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
53 def _compute_regression_metrics(y_true: np.ndarray, y_pred: np.ndarray) -> "OrderedDict[str, float]": |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
54 mse = mean_squared_error(y_true, y_pred) |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
55 rmse = float(np.sqrt(mse)) |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
56 mae = mean_absolute_error(y_true, y_pred) |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
57 # Avoid division by zero using clip |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
58 mape = float(np.mean(np.abs((y_true - y_pred) / np.clip(np.abs(y_true), 1e-12, None))) * 100.0) |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
59 r2 = r2_score(y_true, y_pred) |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
60 medae = median_absolute_error(y_true, y_pred) |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
61 |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
62 metrics = OrderedDict() |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
63 metrics["MSE"] = mse |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
64 metrics["RMSE"] = rmse |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
65 metrics["MAE"] = mae |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
66 metrics["MAPE_%"] = mape |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
67 metrics["R2"] = r2 |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
68 metrics["MedianAE"] = medae |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
69 return metrics |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
70 |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
71 |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
72 def _compute_binary_metrics( |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
73 y_true: pd.Series, |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
74 y_pred: pd.Series, |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
75 y_proba: Optional[np.ndarray], |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
76 predictor |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
77 ) -> "OrderedDict[str, float]": |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
78 metrics = OrderedDict() |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
79 classes_sorted = np.sort(pd.unique(y_true)) |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
80 # Choose the lexicographically larger class as "positive" |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
81 pos_label = classes_sorted[-1] |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
82 |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
83 metrics["Accuracy"] = accuracy_score(y_true, y_pred) |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
84 metrics["Precision"] = precision_score(y_true, y_pred, pos_label=pos_label, zero_division=0) |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
85 metrics["Recall_(Sensitivity/TPR)"] = recall_score(y_true, y_pred, pos_label=pos_label, zero_division=0) |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
86 metrics["F1-Score"] = f1_score(y_true, y_pred, pos_label=pos_label, zero_division=0) |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
87 |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
88 try: |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
89 cm = confusion_matrix(y_true, y_pred, labels=classes_sorted) |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
90 metrics["Specificity_(TNR)"] = _specificity_from_cm(cm) |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
91 except Exception: |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
92 metrics["Specificity_(TNR)"] = np.nan |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
93 |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
94 # Probabilistic metrics |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
95 if y_proba is not None: |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
96 # pick column of positive class |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
97 if y_proba.ndim == 1: |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
98 pos_scores = y_proba |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
99 else: |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
100 pos_col_idx = -1 |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
101 try: |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
102 if hasattr(predictor, "class_labels") and predictor.class_labels: |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
103 pos_col_idx = list(predictor.class_labels).index(pos_label) |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
104 except Exception: |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
105 pos_col_idx = -1 |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
106 pos_scores = y_proba[:, pos_col_idx] |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
107 try: |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
108 metrics["ROC-AUC"] = roc_auc_score(y_true == pos_label, pos_scores) |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
109 except Exception: |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
110 metrics["ROC-AUC"] = np.nan |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
111 try: |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
112 metrics["PR-AUC"] = average_precision_score(y_true == pos_label, pos_scores) |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
113 except Exception: |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
114 metrics["PR-AUC"] = np.nan |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
115 try: |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
116 if y_proba.ndim == 1: |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
117 y_proba_ll = np.column_stack([1 - pos_scores, pos_scores]) |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
118 else: |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
119 y_proba_ll = y_proba |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
120 metrics["LogLoss"] = log_loss(y_true, y_proba_ll, labels=classes_sorted) |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
121 except Exception: |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
122 metrics["LogLoss"] = np.nan |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
123 else: |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
124 metrics["ROC-AUC"] = np.nan |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
125 metrics["PR-AUC"] = np.nan |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
126 metrics["LogLoss"] = np.nan |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
127 |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
128 try: |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
129 metrics["MCC"] = matthews_corrcoef(y_true, y_pred) |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
130 except Exception: |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
131 metrics["MCC"] = np.nan |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
132 |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
133 return metrics |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
134 |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
135 |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
136 def _compute_multiclass_metrics( |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
137 y_true: pd.Series, |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
138 y_pred: pd.Series, |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
139 y_proba: Optional[np.ndarray] |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
140 ) -> "OrderedDict[str, float]": |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
141 metrics = OrderedDict() |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
142 metrics["Accuracy"] = accuracy_score(y_true, y_pred) |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
143 metrics["Macro Precision"] = precision_score(y_true, y_pred, average="macro", zero_division=0) |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
144 metrics["Macro Recall"] = recall_score(y_true, y_pred, average="macro", zero_division=0) |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
145 metrics["Macro F1"] = f1_score(y_true, y_pred, average="macro", zero_division=0) |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
146 metrics["Weighted Precision"] = precision_score(y_true, y_pred, average="weighted", zero_division=0) |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
147 metrics["Weighted Recall"] = recall_score(y_true, y_pred, average="weighted", zero_division=0) |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
148 metrics["Weighted F1"] = f1_score(y_true, y_pred, average="weighted", zero_division=0) |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
149 |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
150 try: |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
151 metrics["Cohen_Kappa"] = cohen_kappa_score(y_true, y_pred) |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
152 except Exception: |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
153 metrics["Cohen_Kappa"] = np.nan |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
154 try: |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
155 metrics["MCC"] = matthews_corrcoef(y_true, y_pred) |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
156 except Exception: |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
157 metrics["MCC"] = np.nan |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
158 |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
159 # Probabilistic metrics |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
160 classes_sorted = np.sort(pd.unique(y_true)) |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
161 if y_proba is not None and y_proba.ndim == 2: |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
162 try: |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
163 metrics["LogLoss"] = log_loss(y_true, y_proba, labels=classes_sorted) |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
164 except Exception: |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
165 metrics["LogLoss"] = np.nan |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
166 # Macro ROC-AUC / PR-AUC via OVR |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
167 try: |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
168 class_to_index = {c: i for i, c in enumerate(classes_sorted)} |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
169 y_true_idx = np.vectorize(class_to_index.get)(y_true) |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
170 metrics["ROC-AUC_macro"] = roc_auc_score(y_true_idx, y_proba, multi_class="ovr", average="macro") |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
171 except Exception: |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
172 metrics["ROC-AUC_macro"] = np.nan |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
173 try: |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
174 Y_true_ind = np.zeros_like(y_proba) |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
175 idx_map = {c: i for i, c in enumerate(classes_sorted)} |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
176 Y_true_ind[np.arange(y_proba.shape[0]), np.vectorize(idx_map.get)(y_true)] = 1 |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
177 metrics["PR-AUC_macro"] = average_precision_score(Y_true_ind, y_proba, average="macro") |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
178 except Exception: |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
179 metrics["PR-AUC_macro"] = np.nan |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
180 else: |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
181 metrics["LogLoss"] = np.nan |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
182 metrics["ROC-AUC_macro"] = np.nan |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
183 metrics["PR-AUC_macro"] = np.nan |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
184 |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
185 return metrics |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
186 |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
187 |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
188 def aggregate_metrics(list_of_dicts): |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
189 """Aggregate a list of metrics dicts (per split) into mean/std.""" |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
190 agg_mean = {} |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
191 agg_std = {} |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
192 for split in ("Train", "Validation", "Test", "Test (external)"): |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
193 keys = set() |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
194 for m in list_of_dicts: |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
195 if isinstance(m, dict) and split in m: |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
196 keys.update(m[split].keys()) |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
197 if not keys: |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
198 continue |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
199 agg_mean[split] = {} |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
200 agg_std[split] = {} |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
201 for k in keys: |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
202 vals = [m[split][k] for m in list_of_dicts if split in m and k in m[split]] |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
203 numeric_vals = [] |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
204 for v in vals: |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
205 try: |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
206 numeric_vals.append(float(v)) |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
207 except Exception: |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
208 pass |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
209 if numeric_vals: |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
210 agg_mean[split][k] = float(np.mean(numeric_vals)) |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
211 agg_std[split][k] = float(np.std(numeric_vals, ddof=0)) |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
212 else: |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
213 agg_mean[split][k] = vals[-1] if vals else None |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
214 agg_std[split][k] = None |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
215 return agg_mean, agg_std |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
216 |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
217 |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
218 def compute_metrics_for_split( |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
219 predictor, |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
220 df: pd.DataFrame, |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
221 target_col: str, |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
222 problem_type: str, |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
223 threshold: Optional[float] = None, # <— NEW |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
224 ) -> "OrderedDict[str, float]": |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
225 """Compute transparency metrics for one split (Train/Val/Test) based on task type.""" |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
226 # Prepare inputs |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
227 features = df.drop(columns=[target_col], errors="ignore") |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
228 y_true_series = df[target_col].reset_index(drop=True) |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
229 |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
230 # Probabilities (if available) |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
231 y_proba = None |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
232 try: |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
233 y_proba_raw = predictor.predict_proba(features) |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
234 y_proba = _safe_y_proba_to_array(y_proba_raw) |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
235 except Exception: |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
236 y_proba = None |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
237 |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
238 # Labels (optionally thresholded for binary) |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
239 y_pred_series = None |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
240 if problem_type == "binary" and (threshold is not None) and (y_proba is not None): |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
241 classes_sorted = np.sort(pd.unique(y_true_series)) |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
242 pos_label = classes_sorted[-1] |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
243 neg_label = classes_sorted[0] |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
244 if y_proba.ndim == 1: |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
245 pos_scores = y_proba |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
246 else: |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
247 pos_col_idx = -1 |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
248 try: |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
249 if hasattr(predictor, "class_labels") and predictor.class_labels: |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
250 pos_col_idx = list(predictor.class_labels).index(pos_label) |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
251 except Exception: |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
252 pos_col_idx = -1 |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
253 pos_scores = y_proba[:, pos_col_idx] |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
254 y_pred_series = pd.Series(np.where(pos_scores >= float(threshold), pos_label, neg_label)).reset_index(drop=True) |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
255 else: |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
256 # Fall back to model's default label prediction (argmax / 0.5 equivalent) |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
257 y_pred_series = pd.Series(predictor.predict(features)).reset_index(drop=True) |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
258 |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
259 if problem_type == "regression": |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
260 y_true_arr = np.asarray(y_true_series, dtype=float) |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
261 y_pred_arr = np.asarray(y_pred_series, dtype=float) |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
262 return _compute_regression_metrics(y_true_arr, y_pred_arr) |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
263 |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
264 if problem_type == "binary": |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
265 return _compute_binary_metrics(y_true_series, y_pred_series, y_proba, predictor) |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
266 |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
267 # multiclass |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
268 return _compute_multiclass_metrics(y_true_series, y_pred_series, y_proba) |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
269 |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
270 |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
271 def evaluate_all_transparency( |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
272 predictor, |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
273 train_df: Optional[pd.DataFrame], |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
274 val_df: Optional[pd.DataFrame], |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
275 test_df: Optional[pd.DataFrame], |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
276 target_col: str, |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
277 problem_type: str, |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
278 threshold: Optional[float] = None, |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
279 ) -> Tuple[pd.DataFrame, Dict[str, Dict[str, float]]]: |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
280 """ |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
281 Evaluate Train/Val/Test with the transparent metrics suite. |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
282 Returns: |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
283 - metrics_table: DataFrame with index=Metric, columns subset of [Train, Validation, Test] |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
284 - raw_dict: nested dict {split -> {metric -> value}} |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
285 """ |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
286 split_results: Dict[str, Dict[str, float]] = {} |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
287 splits = [] |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
288 |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
289 # IMPORTANT: do NOT apply threshold to Train/Val |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
290 if train_df is not None and len(train_df): |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
291 split_results["Train"] = compute_metrics_for_split(predictor, train_df, target_col, problem_type, threshold=None) |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
292 splits.append("Train") |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
293 if val_df is not None and len(val_df): |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
294 split_results["Validation"] = compute_metrics_for_split(predictor, val_df, target_col, problem_type, threshold=None) |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
295 splits.append("Validation") |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
296 if test_df is not None and len(test_df): |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
297 split_results["Test"] = compute_metrics_for_split(predictor, test_df, target_col, problem_type, threshold=threshold) |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
298 splits.append("Test") |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
299 |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
300 # Preserve order from the first split; include any extras from others |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
301 order_source = split_results[splits[0]] if splits else {} |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
302 all_metrics = list(order_source.keys()) |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
303 for s in splits[1:]: |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
304 for m in split_results[s].keys(): |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
305 if m not in all_metrics: |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
306 all_metrics.append(m) |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
307 |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
308 metrics_table = pd.DataFrame(index=all_metrics, columns=splits, dtype=float) |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
309 for s in splits: |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
310 for m, v in split_results[s].items(): |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
311 metrics_table.loc[m, s] = v |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
312 |
|
375c36923da1
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
goeckslab
parents:
diff
changeset
|
313 return metrics_table, split_results |
