annotate image_learner_cli.py @ 10:b0d893d04d4c draft default tip

planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
author goeckslab
date Mon, 08 Sep 2025 22:38:35 +0000
parents 9e912fce264c
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1 import argparse
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
2 import json
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
3 import logging
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
4 import os
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
5 import shutil
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
6 import sys
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
7 import tempfile
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
8 import zipfile
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
9 from pathlib import Path
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
10 from typing import Any, Dict, Optional, Protocol, Tuple
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
11
7
801a8b6973fb planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents: 6
diff changeset
12 import numpy as np
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
13 import pandas as pd
2
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
14 import pandas.api.types as ptypes
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
15 import yaml
2
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
16 from constants import (
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
17 IMAGE_PATH_COLUMN_NAME,
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
18 LABEL_COLUMN_NAME,
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
19 METRIC_DISPLAY_NAMES,
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
20 MODEL_ENCODER_TEMPLATES,
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
21 SPLIT_COLUMN_NAME,
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
22 TEMP_CONFIG_FILENAME,
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
23 TEMP_CSV_FILENAME,
9
9e912fce264c planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents: 8
diff changeset
24 TEMP_DIR_PREFIX,
2
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
25 )
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
26 from ludwig.globals import (
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
27 DESCRIPTION_FILE_NAME,
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
28 PREDICTIONS_PARQUET_FILE_NAME,
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
29 TEST_STATISTICS_FILE_NAME,
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
30 TRAIN_SET_METADATA_FILE_NAME,
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
31 )
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
32 from ludwig.utils.data_utils import get_split_path
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
33 from ludwig.visualize import get_visualizations_registry
8
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
34 from plotly_plots import build_classification_plots
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
35 from sklearn.model_selection import train_test_split
2
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
36 from utils import (
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
37 build_tabbed_html,
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
38 encode_image_to_base64,
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
39 get_html_closing,
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
40 get_html_template,
9
9e912fce264c planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents: 8
diff changeset
41 get_metrics_help_modal,
2
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
42 )
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
43
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
44 # --- Logging Setup ---
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
45 logging.basicConfig(
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
46 level=logging.INFO,
9
9e912fce264c planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents: 8
diff changeset
47 format="%(asctime)s %(levelname)s %(name)s: %(message)s",
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
48 )
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
49 logger = logging.getLogger("ImageLearner")
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
50
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
51
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
52 def format_config_table_html(
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
53 config: dict,
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
54 split_info: Optional[str] = None,
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
55 training_progress: dict = None,
8
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
56 output_type: Optional[str] = None,
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
57 ) -> str:
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
58 display_keys = [
2
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
59 "task_type",
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
60 "model_name",
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
61 "epochs",
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
62 "batch_size",
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
63 "fine_tune",
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
64 "use_pretrained",
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
65 "learning_rate",
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
66 "random_seed",
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
67 "early_stop",
8
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
68 "threshold",
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
69 ]
9
9e912fce264c planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents: 8
diff changeset
70
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
71 rows = []
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
72 for key in display_keys:
8
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
73 val = config.get(key, None)
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
74 if key == "threshold":
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
75 if output_type != "binary":
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
76 continue
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
77 val = val if val is not None else 0.5
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
78 val_str = f"{val:.2f}"
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
79 if val == 0.5:
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
80 val_str += " (default)"
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
81 else:
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
82 if key == "task_type":
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
83 val_str = val.title() if isinstance(val, str) else "N/A"
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
84 elif key == "batch_size":
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
85 if val is not None:
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
86 val_str = int(val)
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
87 else:
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
88 if training_progress:
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
89 resolved_val = training_progress.get("batch_size")
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
90 val_str = (
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
91 "Auto-selected batch size by Ludwig:<br>"
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
92 f"<span style='font-size: 0.85em;'>{resolved_val}</span><br>"
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
93 )
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
94 else:
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
95 val_str = "auto"
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
96 elif key == "learning_rate":
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
97 if val is not None and val != "auto":
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
98 val_str = f"{val:.6f}"
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
99 else:
8
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
100 if training_progress:
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
101 resolved_val = training_progress.get("learning_rate")
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
102 val_str = (
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
103 "Auto-selected learning rate by Ludwig:<br>"
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
104 f"<span style='font-size: 0.85em;'>"
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
105 f"{resolved_val if resolved_val else 'auto'}</span><br>"
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
106 "<span style='font-size: 0.85em;'>"
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
107 "Based on model architecture and training setup "
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
108 "(e.g., fine-tuning).<br>"
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
109 "</span>"
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
110 )
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
111 else:
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
112 val_str = (
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
113 "Auto-selected by Ludwig<br>"
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
114 "<span style='font-size: 0.85em;'>"
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
115 "Automatically tuned based on architecture and dataset.<br>"
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
116 "See <a href='https://ludwig.ai/latest/configuration/trainer/"
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
117 "#trainer-parameters' target='_blank'>"
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
118 "Ludwig Trainer Parameters</a> for details."
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
119 "</span>"
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
120 )
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
121 elif key == "epochs":
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
122 if val is None:
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
123 val_str = "N/A"
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
124 else:
8
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
125 if (
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
126 training_progress
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
127 and "epoch" in training_progress
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
128 and val > training_progress["epoch"]
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
129 ):
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
130 val_str = (
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
131 f"Because of early stopping: the training "
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
132 f"stopped at epoch {training_progress['epoch']}"
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
133 )
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
134 else:
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
135 val_str = val
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
136 else:
8
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
137 val_str = val if val is not None else "N/A"
10
b0d893d04d4c planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 9
diff changeset
138 if val_str == "N/A" and key not in ["task_type"]:
8
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
139 continue
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
140 rows.append(
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
141 f"<tr>"
10
b0d893d04d4c planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 9
diff changeset
142 f"<td style='padding: 6px 12px; border: 1px solid #ccc; text-align: left; "
b0d893d04d4c planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 9
diff changeset
143 f"white-space: normal; word-break: break-word; overflow-wrap: anywhere;'>"
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
144 f"{key.replace('_', ' ').title()}</td>"
10
b0d893d04d4c planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 9
diff changeset
145 f"<td style='padding: 6px 12px; border: 1px solid #ccc; text-align: center; "
b0d893d04d4c planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 9
diff changeset
146 f"white-space: normal; word-break: break-word; overflow-wrap: anywhere;'>"
8
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
147 f"{val_str}</td>"
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
148 f"</tr>"
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
149 )
2
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
150 aug_cfg = config.get("augmentation")
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
151 if aug_cfg:
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
152 types = [str(a.get("type", "")) for a in aug_cfg]
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
153 aug_val = ", ".join(types)
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
154 rows.append(
10
b0d893d04d4c planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 9
diff changeset
155 f"<tr><td style='padding: 6px 12px; border: 1px solid #ccc; text-align: left; "
b0d893d04d4c planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 9
diff changeset
156 f"white-space: normal; word-break: break-word; overflow-wrap: anywhere;'>Augmentation</td>"
b0d893d04d4c planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 9
diff changeset
157 f"<td style='padding: 6px 12px; border: 1px solid #ccc; text-align: center; "
b0d893d04d4c planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 9
diff changeset
158 f"white-space: normal; word-break: break-word; overflow-wrap: anywhere;'>{aug_val}</td></tr>"
2
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
159 )
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
160 if split_info:
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
161 rows.append(
10
b0d893d04d4c planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 9
diff changeset
162 f"<tr><td style='padding: 6px 12px; border: 1px solid #ccc; text-align: left; "
b0d893d04d4c planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 9
diff changeset
163 f"white-space: normal; word-break: break-word; overflow-wrap: anywhere;'>Data Split</td>"
b0d893d04d4c planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 9
diff changeset
164 f"<td style='padding: 6px 12px; border: 1px solid #ccc; text-align: center; "
b0d893d04d4c planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 9
diff changeset
165 f"white-space: normal; word-break: break-word; overflow-wrap: anywhere;'>{split_info}</td></tr>"
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
166 )
8
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
167 html = f"""
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
168 <h2 style="text-align: center;">Model and Training Summary</h2>
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
169 <div style="display: flex; justify-content: center;">
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
170 <table style="border-collapse: collapse; width: 100%; table-layout: fixed;">
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
171 <thead><tr>
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
172 <th style="padding: 10px; border: 1px solid #ccc; text-align: left; white-space: nowrap; overflow: hidden; text-overflow: ellipsis;">Parameter</th>
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
173 <th style="padding: 10px; border: 1px solid #ccc; text-align: center; white-space: nowrap; overflow: hidden; text-overflow: ellipsis;">Value</th>
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
174 </tr></thead>
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
175 <tbody>
9
9e912fce264c planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents: 8
diff changeset
176 {"".join(rows)}
8
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
177 </tbody>
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
178 </table>
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
179 </div><br>
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
180 <p style="text-align: center; font-size: 0.9em;">
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
181 Model trained using <a href="https://ludwig.ai/" target="_blank" rel="noopener noreferrer">Ludwig</a>.
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
182 <a href="https://ludwig.ai/latest/configuration/" target="_blank" rel="noopener noreferrer">
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
183 Ludwig documentation provides detailed information about default model and training parameters
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
184 </a>
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
185 </p><hr>
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
186 """
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
187 return html
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
188
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
189
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
190 def detect_output_type(test_stats):
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
191 """Detects if the output type is 'binary' or 'category' based on test statistics."""
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
192 label_stats = test_stats.get("label", {})
2
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
193 if "mean_squared_error" in label_stats:
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
194 return "regression"
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
195 per_class = label_stats.get("per_class_stats", {})
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
196 if len(per_class) == 2:
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
197 return "binary"
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
198 return "category"
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
199
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
200
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
201 def extract_metrics_from_json(
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
202 train_stats: dict,
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
203 test_stats: dict,
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
204 output_type: str,
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
205 ) -> dict:
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
206 """Extracts relevant metrics from training and test statistics based on the output type."""
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
207 metrics = {"training": {}, "validation": {}, "test": {}}
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
208
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
209 def get_last_value(stats, key):
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
210 val = stats.get(key)
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
211 if isinstance(val, list) and val:
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
212 return val[-1]
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
213 elif isinstance(val, (int, float)):
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
214 return val
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
215 return None
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
216
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
217 for split in ["training", "validation"]:
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
218 split_stats = train_stats.get(split, {})
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
219 if not split_stats:
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
220 logging.warning(f"No statistics found for {split} split")
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
221 continue
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
222 label_stats = split_stats.get("label", {})
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
223 if not label_stats:
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
224 logging.warning(f"No label statistics found for {split} split")
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
225 continue
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
226 if output_type == "binary":
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
227 metrics[split] = {
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
228 "accuracy": get_last_value(label_stats, "accuracy"),
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
229 "loss": get_last_value(label_stats, "loss"),
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
230 "precision": get_last_value(label_stats, "precision"),
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
231 "recall": get_last_value(label_stats, "recall"),
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
232 "specificity": get_last_value(label_stats, "specificity"),
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
233 "roc_auc": get_last_value(label_stats, "roc_auc"),
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
234 }
2
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
235 elif output_type == "regression":
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
236 metrics[split] = {
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
237 "loss": get_last_value(label_stats, "loss"),
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
238 "mean_absolute_error": get_last_value(
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
239 label_stats, "mean_absolute_error"
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
240 ),
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
241 "mean_absolute_percentage_error": get_last_value(
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
242 label_stats, "mean_absolute_percentage_error"
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
243 ),
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
244 "mean_squared_error": get_last_value(label_stats, "mean_squared_error"),
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
245 "root_mean_squared_error": get_last_value(
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
246 label_stats, "root_mean_squared_error"
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
247 ),
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
248 "root_mean_squared_percentage_error": get_last_value(
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
249 label_stats, "root_mean_squared_percentage_error"
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
250 ),
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
251 "r2": get_last_value(label_stats, "r2"),
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
252 }
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
253 else:
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
254 metrics[split] = {
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
255 "accuracy": get_last_value(label_stats, "accuracy"),
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
256 "accuracy_micro": get_last_value(label_stats, "accuracy_micro"),
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
257 "loss": get_last_value(label_stats, "loss"),
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
258 "roc_auc": get_last_value(label_stats, "roc_auc"),
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
259 "hits_at_k": get_last_value(label_stats, "hits_at_k"),
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
260 }
9
9e912fce264c planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents: 8
diff changeset
261
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
262 # Test metrics: dynamic extraction according to exclusions
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
263 test_label_stats = test_stats.get("label", {})
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
264 if not test_label_stats:
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
265 logging.warning("No label statistics found for test split")
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
266 else:
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
267 combined_stats = test_stats.get("combined", {})
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
268 overall_stats = test_label_stats.get("overall_stats", {})
9
9e912fce264c planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents: 8
diff changeset
269
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
270 # Define exclusions
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
271 if output_type == "binary":
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
272 exclude = {"per_class_stats", "precision_recall_curve", "roc_curve"}
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
273 else:
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
274 exclude = {"per_class_stats", "confusion_matrix"}
9
9e912fce264c planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents: 8
diff changeset
275
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
276 # 1. Get all scalar test_label_stats not excluded
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
277 test_metrics = {}
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
278 for k, v in test_label_stats.items():
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
279 if k in exclude:
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
280 continue
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
281 if k == "overall_stats":
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
282 continue
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
283 if isinstance(v, (int, float, str, bool)):
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
284 test_metrics[k] = v
9
9e912fce264c planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents: 8
diff changeset
285
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
286 # 2. Add overall_stats (flattened)
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
287 for k, v in overall_stats.items():
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
288 test_metrics[k] = v
9
9e912fce264c planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents: 8
diff changeset
289
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
290 # 3. Optionally include combined/loss if present and not already
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
291 if "loss" in combined_stats and "loss" not in test_metrics:
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
292 test_metrics["loss"] = combined_stats["loss"]
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
293 metrics["test"] = test_metrics
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
294 return metrics
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
295
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
296
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
297 def generate_table_row(cells, styles):
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
298 """Helper function to generate an HTML table row."""
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
299 return (
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
300 "<tr>"
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
301 + "".join(f"<td style='{styles}'>{cell}</td>" for cell in cells)
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
302 + "</tr>"
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
303 )
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
304
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
305
8
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
306 # -----------------------------------------
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
307 # 2) MODEL PERFORMANCE (Train/Val/Test) TABLE
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
308 # -----------------------------------------
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
309
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
310
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
311 def format_stats_table_html(train_stats: dict, test_stats: dict) -> str:
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
312 """Formats a combined HTML table for training, validation, and test metrics."""
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
313 output_type = detect_output_type(test_stats)
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
314 all_metrics = extract_metrics_from_json(train_stats, test_stats, output_type)
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
315 rows = []
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
316 for metric_key in sorted(all_metrics["training"].keys()):
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
317 if (
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
318 metric_key in all_metrics["validation"]
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
319 and metric_key in all_metrics["test"]
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
320 ):
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
321 display_name = METRIC_DISPLAY_NAMES.get(
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
322 metric_key,
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
323 metric_key.replace("_", " ").title(),
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
324 )
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
325 t = all_metrics["training"].get(metric_key)
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
326 v = all_metrics["validation"].get(metric_key)
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
327 te = all_metrics["test"].get(metric_key)
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
328 if all(x is not None for x in [t, v, te]):
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
329 rows.append([display_name, f"{t:.4f}", f"{v:.4f}", f"{te:.4f}"])
9
9e912fce264c planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents: 8
diff changeset
330
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
331 if not rows:
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
332 return "<table><tr><td>No metric values found.</td></tr></table>"
9
9e912fce264c planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents: 8
diff changeset
333
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
334 html = (
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
335 "<h2 style='text-align: center;'>Model Performance Summary</h2>"
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
336 "<div style='display: flex; justify-content: center;'>"
8
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
337 "<table class='performance-summary' style='border-collapse: collapse;'>"
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
338 "<thead><tr>"
8
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
339 "<th class='sortable' style='padding: 10px; border: 1px solid #ccc; text-align: left; white-space: nowrap;'>Metric</th>"
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
340 "<th class='sortable' style='padding: 10px; border: 1px solid #ccc; text-align: center; white-space: nowrap;'>Train</th>"
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
341 "<th class='sortable' style='padding: 10px; border: 1px solid #ccc; text-align: center; white-space: nowrap;'>Validation</th>"
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
342 "<th class='sortable' style='padding: 10px; border: 1px solid #ccc; text-align: center; white-space: nowrap;'>Test</th>"
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
343 "</tr></thead><tbody>"
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
344 )
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
345 for row in rows:
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
346 html += generate_table_row(
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
347 row,
9
9e912fce264c planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents: 8
diff changeset
348 "padding: 10px; border: 1px solid #ccc; text-align: center; white-space: nowrap;",
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
349 )
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
350 html += "</tbody></table></div><br>"
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
351 return html
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
352
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
353
8
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
354 # -------------------------------------------
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
355 # 3) TRAIN/VALIDATION PERFORMANCE SUMMARY TABLE
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
356 # -------------------------------------------
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
357
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
358
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
359 def format_train_val_stats_table_html(train_stats: dict, test_stats: dict) -> str:
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
360 """Formats an HTML table for training and validation metrics."""
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
361 output_type = detect_output_type(test_stats)
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
362 all_metrics = extract_metrics_from_json(train_stats, test_stats, output_type)
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
363 rows = []
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
364 for metric_key in sorted(all_metrics["training"].keys()):
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
365 if metric_key in all_metrics["validation"]:
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
366 display_name = METRIC_DISPLAY_NAMES.get(
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
367 metric_key,
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
368 metric_key.replace("_", " ").title(),
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
369 )
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
370 t = all_metrics["training"].get(metric_key)
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
371 v = all_metrics["validation"].get(metric_key)
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
372 if t is not None and v is not None:
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
373 rows.append([display_name, f"{t:.4f}", f"{v:.4f}"])
9
9e912fce264c planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents: 8
diff changeset
374
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
375 if not rows:
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
376 return "<table><tr><td>No metric values found for Train/Validation.</td></tr></table>"
9
9e912fce264c planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents: 8
diff changeset
377
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
378 html = (
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
379 "<h2 style='text-align: center;'>Train/Validation Performance Summary</h2>"
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
380 "<div style='display: flex; justify-content: center;'>"
8
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
381 "<table class='performance-summary' style='border-collapse: collapse;'>"
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
382 "<thead><tr>"
8
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
383 "<th class='sortable' style='padding: 10px; border: 1px solid #ccc; text-align: left; white-space: nowrap;'>Metric</th>"
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
384 "<th class='sortable' style='padding: 10px; border: 1px solid #ccc; text-align: center; white-space: nowrap;'>Train</th>"
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
385 "<th class='sortable' style='padding: 10px; border: 1px solid #ccc; text-align: center; white-space: nowrap;'>Validation</th>"
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
386 "</tr></thead><tbody>"
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
387 )
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
388 for row in rows:
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
389 html += generate_table_row(
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
390 row,
9
9e912fce264c planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents: 8
diff changeset
391 "padding: 10px; border: 1px solid #ccc; text-align: center; white-space: nowrap;",
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
392 )
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
393 html += "</tbody></table></div><br>"
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
394 return html
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
395
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
396
8
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
397 # -----------------------------------------
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
398 # 4) TEST‐ONLY PERFORMANCE SUMMARY TABLE
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
399 # -----------------------------------------
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
400
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
401
2
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
402 def format_test_merged_stats_table_html(
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
403 test_metrics: Dict[str, Optional[float]],
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
404 ) -> str:
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
405 """Formats an HTML table for test metrics."""
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
406 rows = []
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
407 for key in sorted(test_metrics.keys()):
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
408 display_name = METRIC_DISPLAY_NAMES.get(key, key.replace("_", " ").title())
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
409 value = test_metrics[key]
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
410 if value is not None:
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
411 rows.append([display_name, f"{value:.4f}"])
9
9e912fce264c planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents: 8
diff changeset
412
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
413 if not rows:
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
414 return "<table><tr><td>No test metric values found.</td></tr></table>"
9
9e912fce264c planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents: 8
diff changeset
415
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
416 html = (
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
417 "<h2 style='text-align: center;'>Test Performance Summary</h2>"
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
418 "<div style='display: flex; justify-content: center;'>"
8
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
419 "<table class='performance-summary' style='border-collapse: collapse;'>"
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
420 "<thead><tr>"
8
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
421 "<th class='sortable' style='padding: 10px; border: 1px solid #ccc; text-align: left; white-space: nowrap;'>Metric</th>"
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
422 "<th class='sortable' style='padding: 10px; border: 1px solid #ccc; text-align: center; white-space: nowrap;'>Test</th>"
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
423 "</tr></thead><tbody>"
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
424 )
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
425 for row in rows:
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
426 html += generate_table_row(
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
427 row,
9
9e912fce264c planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents: 8
diff changeset
428 "padding: 10px; border: 1px solid #ccc; text-align: center; white-space: nowrap;",
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
429 )
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
430 html += "</tbody></table></div><br>"
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
431 return html
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
432
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
433
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
434 def split_data_0_2(
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
435 df: pd.DataFrame,
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
436 split_column: str,
7
801a8b6973fb planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents: 6
diff changeset
437 validation_size: float = 0.1,
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
438 random_state: int = 42,
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
439 label_column: Optional[str] = None,
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
440 ) -> pd.DataFrame:
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
441 """Given a DataFrame whose split_column only contains {0,2}, re-assign a portion of the 0s to become 1s (validation)."""
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
442 out = df.copy()
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
443 out[split_column] = pd.to_numeric(out[split_column], errors="coerce").astype(int)
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
444 idx_train = out.index[out[split_column] == 0].tolist()
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
445 if not idx_train:
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
446 logger.info("No rows with split=0; nothing to do.")
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
447 return out
7
801a8b6973fb planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents: 6
diff changeset
448 # Always use stratify if possible
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
449 stratify_arr = None
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
450 if label_column and label_column in out.columns:
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
451 label_counts = out.loc[idx_train, label_column].value_counts()
7
801a8b6973fb planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents: 6
diff changeset
452 if label_counts.size > 1:
801a8b6973fb planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents: 6
diff changeset
453 # Force stratify even with fewer samples - adjust validation_size if needed
801a8b6973fb planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents: 6
diff changeset
454 min_samples_per_class = label_counts.min()
801a8b6973fb planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents: 6
diff changeset
455 if min_samples_per_class * validation_size < 1:
801a8b6973fb planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents: 6
diff changeset
456 # Adjust validation_size to ensure at least 1 sample per class, but do not exceed original validation_size
9
9e912fce264c planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents: 8
diff changeset
457 adjusted_validation_size = min(
9e912fce264c planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents: 8
diff changeset
458 validation_size, 1.0 / min_samples_per_class
9e912fce264c planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents: 8
diff changeset
459 )
7
801a8b6973fb planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents: 6
diff changeset
460 if adjusted_validation_size != validation_size:
801a8b6973fb planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents: 6
diff changeset
461 validation_size = adjusted_validation_size
9
9e912fce264c planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents: 8
diff changeset
462 logger.info(
9e912fce264c planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents: 8
diff changeset
463 f"Adjusted validation_size to {validation_size:.3f} to ensure at least one sample per class in validation"
9e912fce264c planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents: 8
diff changeset
464 )
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
465 stratify_arr = out.loc[idx_train, label_column]
7
801a8b6973fb planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents: 6
diff changeset
466 logger.info("Using stratified split for validation set")
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
467 else:
7
801a8b6973fb planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents: 6
diff changeset
468 logger.warning("Only one label class found; cannot stratify")
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
469 if validation_size <= 0:
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
470 logger.info("validation_size <= 0; keeping all as train.")
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
471 return out
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
472 if validation_size >= 1:
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
473 logger.info("validation_size >= 1; moving all train → validation.")
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
474 out.loc[idx_train, split_column] = 1
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
475 return out
7
801a8b6973fb planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents: 6
diff changeset
476 # Always try stratified split first
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
477 try:
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
478 train_idx, val_idx = train_test_split(
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
479 idx_train,
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
480 test_size=validation_size,
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
481 random_state=random_state,
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
482 stratify=stratify_arr,
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
483 )
7
801a8b6973fb planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents: 6
diff changeset
484 logger.info("Successfully applied stratified split")
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
485 except ValueError as e:
7
801a8b6973fb planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents: 6
diff changeset
486 logger.warning(f"Stratified split failed ({e}); falling back to random split.")
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
487 train_idx, val_idx = train_test_split(
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
488 idx_train,
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
489 test_size=validation_size,
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
490 random_state=random_state,
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
491 stratify=None,
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
492 )
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
493 out.loc[train_idx, split_column] = 0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
494 out.loc[val_idx, split_column] = 1
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
495 out[split_column] = out[split_column].astype(int)
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
496 return out
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
497
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
498
7
801a8b6973fb planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents: 6
diff changeset
499 def create_stratified_random_split(
801a8b6973fb planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents: 6
diff changeset
500 df: pd.DataFrame,
801a8b6973fb planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents: 6
diff changeset
501 split_column: str,
801a8b6973fb planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents: 6
diff changeset
502 split_probabilities: list = [0.7, 0.1, 0.2],
801a8b6973fb planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents: 6
diff changeset
503 random_state: int = 42,
801a8b6973fb planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents: 6
diff changeset
504 label_column: Optional[str] = None,
801a8b6973fb planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents: 6
diff changeset
505 ) -> pd.DataFrame:
801a8b6973fb planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents: 6
diff changeset
506 """Create a stratified random split when no split column exists."""
801a8b6973fb planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents: 6
diff changeset
507 out = df.copy()
801a8b6973fb planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents: 6
diff changeset
508 # initialize split column
801a8b6973fb planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents: 6
diff changeset
509 out[split_column] = 0
801a8b6973fb planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents: 6
diff changeset
510 if not label_column or label_column not in out.columns:
9
9e912fce264c planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents: 8
diff changeset
511 logger.warning(
9e912fce264c planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents: 8
diff changeset
512 "No label column found; using random split without stratification"
9e912fce264c planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents: 8
diff changeset
513 )
7
801a8b6973fb planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents: 6
diff changeset
514 # fall back to simple random assignment
801a8b6973fb planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents: 6
diff changeset
515 indices = out.index.tolist()
801a8b6973fb planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents: 6
diff changeset
516 np.random.seed(random_state)
801a8b6973fb planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents: 6
diff changeset
517 np.random.shuffle(indices)
801a8b6973fb planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents: 6
diff changeset
518 n_total = len(indices)
801a8b6973fb planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents: 6
diff changeset
519 n_train = int(n_total * split_probabilities[0])
801a8b6973fb planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents: 6
diff changeset
520 n_val = int(n_total * split_probabilities[1])
801a8b6973fb planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents: 6
diff changeset
521 out.loc[indices[:n_train], split_column] = 0
801a8b6973fb planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents: 6
diff changeset
522 out.loc[indices[n_train:n_train + n_val], split_column] = 1
801a8b6973fb planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents: 6
diff changeset
523 out.loc[indices[n_train + n_val:], split_column] = 2
801a8b6973fb planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents: 6
diff changeset
524 return out.astype({split_column: int})
801a8b6973fb planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents: 6
diff changeset
525 # check if stratification is possible
801a8b6973fb planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents: 6
diff changeset
526 label_counts = out[label_column].value_counts()
801a8b6973fb planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents: 6
diff changeset
527 min_samples_per_class = label_counts.min()
801a8b6973fb planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents: 6
diff changeset
528 # ensure we have enough samples for stratification:
801a8b6973fb planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents: 6
diff changeset
529 # Each class must have at least as many samples as the number of splits,
801a8b6973fb planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents: 6
diff changeset
530 # so that each split can receive at least one sample per class.
801a8b6973fb planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents: 6
diff changeset
531 min_samples_required = len(split_probabilities)
801a8b6973fb planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents: 6
diff changeset
532 if min_samples_per_class < min_samples_required:
801a8b6973fb planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents: 6
diff changeset
533 logger.warning(
801a8b6973fb planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents: 6
diff changeset
534 f"Insufficient samples per class for stratification (min: {min_samples_per_class}, required: {min_samples_required}); using random split"
801a8b6973fb planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents: 6
diff changeset
535 )
801a8b6973fb planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents: 6
diff changeset
536 # fall back to simple random assignment
801a8b6973fb planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents: 6
diff changeset
537 indices = out.index.tolist()
801a8b6973fb planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents: 6
diff changeset
538 np.random.seed(random_state)
801a8b6973fb planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents: 6
diff changeset
539 np.random.shuffle(indices)
801a8b6973fb planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents: 6
diff changeset
540 n_total = len(indices)
801a8b6973fb planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents: 6
diff changeset
541 n_train = int(n_total * split_probabilities[0])
801a8b6973fb planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents: 6
diff changeset
542 n_val = int(n_total * split_probabilities[1])
801a8b6973fb planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents: 6
diff changeset
543 out.loc[indices[:n_train], split_column] = 0
801a8b6973fb planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents: 6
diff changeset
544 out.loc[indices[n_train:n_train + n_val], split_column] = 1
801a8b6973fb planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents: 6
diff changeset
545 out.loc[indices[n_train + n_val:], split_column] = 2
801a8b6973fb planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents: 6
diff changeset
546 return out.astype({split_column: int})
801a8b6973fb planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents: 6
diff changeset
547 logger.info("Using stratified random split for train/validation/test sets")
801a8b6973fb planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents: 6
diff changeset
548 # first split: separate test set
801a8b6973fb planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents: 6
diff changeset
549 train_val_idx, test_idx = train_test_split(
801a8b6973fb planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents: 6
diff changeset
550 out.index.tolist(),
801a8b6973fb planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents: 6
diff changeset
551 test_size=split_probabilities[2],
801a8b6973fb planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents: 6
diff changeset
552 random_state=random_state,
801a8b6973fb planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents: 6
diff changeset
553 stratify=out[label_column],
801a8b6973fb planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents: 6
diff changeset
554 )
801a8b6973fb planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents: 6
diff changeset
555 # second split: separate training and validation from remaining data
9
9e912fce264c planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents: 8
diff changeset
556 val_size_adjusted = split_probabilities[1] / (
9e912fce264c planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents: 8
diff changeset
557 split_probabilities[0] + split_probabilities[1]
9e912fce264c planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents: 8
diff changeset
558 )
7
801a8b6973fb planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents: 6
diff changeset
559 train_idx, val_idx = train_test_split(
801a8b6973fb planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents: 6
diff changeset
560 train_val_idx,
801a8b6973fb planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents: 6
diff changeset
561 test_size=val_size_adjusted,
801a8b6973fb planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents: 6
diff changeset
562 random_state=random_state,
801a8b6973fb planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents: 6
diff changeset
563 stratify=out.loc[train_val_idx, label_column],
801a8b6973fb planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents: 6
diff changeset
564 )
801a8b6973fb planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents: 6
diff changeset
565 # assign split values
801a8b6973fb planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents: 6
diff changeset
566 out.loc[train_idx, split_column] = 0
801a8b6973fb planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents: 6
diff changeset
567 out.loc[val_idx, split_column] = 1
801a8b6973fb planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents: 6
diff changeset
568 out.loc[test_idx, split_column] = 2
801a8b6973fb planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents: 6
diff changeset
569 logger.info("Successfully applied stratified random split")
9
9e912fce264c planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents: 8
diff changeset
570 logger.info(
9e912fce264c planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents: 8
diff changeset
571 f"Split counts: Train={len(train_idx)}, Val={len(val_idx)}, Test={len(test_idx)}"
9e912fce264c planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents: 8
diff changeset
572 )
7
801a8b6973fb planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents: 6
diff changeset
573 return out.astype({split_column: int})
801a8b6973fb planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents: 6
diff changeset
574
801a8b6973fb planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents: 6
diff changeset
575
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
576 class Backend(Protocol):
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
577 """Interface for a machine learning backend."""
9
9e912fce264c planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents: 8
diff changeset
578
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
579 def prepare_config(
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
580 self,
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
581 config_params: Dict[str, Any],
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
582 split_config: Dict[str, Any],
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
583 ) -> str:
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
584 ...
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
585
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
586 def run_experiment(
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
587 self,
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
588 dataset_path: Path,
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
589 config_path: Path,
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
590 output_dir: Path,
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
591 random_seed: int,
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
592 ) -> None:
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
593 ...
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
594
2
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
595 def generate_plots(self, output_dir: Path) -> None:
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
596 ...
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
597
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
598 def generate_html_report(
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
599 self,
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
600 title: str,
2
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
601 output_dir: str,
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
602 config: Dict[str, Any],
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
603 split_info: str,
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
604 ) -> Path:
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
605 ...
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
606
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
607
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
608 class LudwigDirectBackend:
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
609 """Backend for running Ludwig experiments directly via the internal experiment_cli function."""
9
9e912fce264c planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents: 8
diff changeset
610
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
611 def prepare_config(
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
612 self,
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
613 config_params: Dict[str, Any],
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
614 split_config: Dict[str, Any],
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
615 ) -> str:
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
616 logger.info("LudwigDirectBackend: Preparing YAML configuration.")
9
9e912fce264c planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents: 8
diff changeset
617
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
618 model_name = config_params.get("model_name", "resnet18")
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
619 use_pretrained = config_params.get("use_pretrained", False)
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
620 fine_tune = config_params.get("fine_tune", False)
2
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
621 if use_pretrained:
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
622 trainable = bool(fine_tune)
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
623 else:
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
624 trainable = True
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
625 epochs = config_params.get("epochs", 10)
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
626 batch_size = config_params.get("batch_size")
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
627 num_processes = config_params.get("preprocessing_num_processes", 1)
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
628 early_stop = config_params.get("early_stop", None)
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
629 learning_rate = config_params.get("learning_rate")
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
630 learning_rate = "auto" if learning_rate is None else float(learning_rate)
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
631 raw_encoder = MODEL_ENCODER_TEMPLATES.get(model_name, model_name)
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
632 if isinstance(raw_encoder, dict):
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
633 encoder_config = {
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
634 **raw_encoder,
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
635 "use_pretrained": use_pretrained,
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
636 "trainable": trainable,
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
637 }
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
638 else:
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
639 encoder_config = {"type": raw_encoder}
9
9e912fce264c planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents: 8
diff changeset
640
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
641 batch_size_cfg = batch_size or "auto"
9
9e912fce264c planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents: 8
diff changeset
642
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
643 label_column_path = config_params.get("label_column_data_path")
2
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
644 label_series = None
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
645 if label_column_path is not None and Path(label_column_path).exists():
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
646 try:
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
647 label_series = pd.read_csv(label_column_path)[LABEL_COLUMN_NAME]
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
648 except Exception as e:
2
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
649 logger.warning(f"Could not read label column for task detection: {e}")
9
9e912fce264c planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents: 8
diff changeset
650
2
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
651 if (
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
652 label_series is not None
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
653 and ptypes.is_numeric_dtype(label_series.dtype)
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
654 and label_series.nunique() > 10
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
655 ):
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
656 task_type = "regression"
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
657 else:
2
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
658 task_type = "classification"
9
9e912fce264c planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents: 8
diff changeset
659
2
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
660 config_params["task_type"] = task_type
9
9e912fce264c planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents: 8
diff changeset
661
2
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
662 image_feat: Dict[str, Any] = {
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
663 "name": IMAGE_PATH_COLUMN_NAME,
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
664 "type": "image",
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
665 "encoder": encoder_config,
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
666 }
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
667 if config_params.get("augmentation") is not None:
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
668 image_feat["augmentation"] = config_params["augmentation"]
9
9e912fce264c planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents: 8
diff changeset
669
2
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
670 if task_type == "regression":
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
671 output_feat = {
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
672 "name": LABEL_COLUMN_NAME,
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
673 "type": "number",
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
674 "decoder": {"type": "regressor"},
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
675 "loss": {"type": "mean_squared_error"},
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
676 "evaluation": {
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
677 "metrics": [
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
678 "mean_squared_error",
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
679 "mean_absolute_error",
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
680 "r2",
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
681 ]
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
682 },
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
683 }
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
684 val_metric = config_params.get("validation_metric", "mean_squared_error")
9
9e912fce264c planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents: 8
diff changeset
685
2
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
686 else:
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
687 num_unique_labels = (
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
688 label_series.nunique() if label_series is not None else 2
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
689 )
2
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
690 output_type = "binary" if num_unique_labels == 2 else "category"
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
691 output_feat = {"name": LABEL_COLUMN_NAME, "type": output_type}
8
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
692 if output_type == "binary" and config_params.get("threshold") is not None:
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
693 output_feat["threshold"] = float(config_params["threshold"])
2
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
694 val_metric = None
9
9e912fce264c planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents: 8
diff changeset
695
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
696 conf: Dict[str, Any] = {
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
697 "model_type": "ecd",
2
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
698 "input_features": [image_feat],
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
699 "output_features": [output_feat],
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
700 "combiner": {"type": "concat"},
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
701 "trainer": {
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
702 "epochs": epochs,
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
703 "early_stop": early_stop,
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
704 "batch_size": batch_size_cfg,
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
705 "learning_rate": learning_rate,
2
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
706 # only set validation_metric for regression
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
707 **({"validation_metric": val_metric} if val_metric else {}),
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
708 },
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
709 "preprocessing": {
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
710 "split": split_config,
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
711 "num_processes": num_processes,
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
712 "in_memory": False,
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
713 },
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
714 }
9
9e912fce264c planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents: 8
diff changeset
715
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
716 logger.debug("LudwigDirectBackend: Config dict built.")
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
717 try:
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
718 yaml_str = yaml.dump(conf, sort_keys=False, indent=2)
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
719 logger.info("LudwigDirectBackend: YAML config generated.")
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
720 return yaml_str
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
721 except Exception:
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
722 logger.error(
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
723 "LudwigDirectBackend: Failed to serialize YAML.",
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
724 exc_info=True,
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
725 )
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
726 raise
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
727
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
728 def run_experiment(
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
729 self,
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
730 dataset_path: Path,
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
731 config_path: Path,
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
732 output_dir: Path,
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
733 random_seed: int = 42,
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
734 ) -> None:
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
735 """Invoke Ludwig's internal experiment_cli function to run the experiment."""
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
736 logger.info("LudwigDirectBackend: Starting experiment execution.")
9
9e912fce264c planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents: 8
diff changeset
737
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
738 try:
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
739 from ludwig.experiment import experiment_cli
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
740 except ImportError as e:
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
741 logger.error(
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
742 "LudwigDirectBackend: Could not import experiment_cli.",
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
743 exc_info=True,
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
744 )
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
745 raise RuntimeError("Ludwig import failed.") from e
9
9e912fce264c planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents: 8
diff changeset
746
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
747 output_dir.mkdir(parents=True, exist_ok=True)
9
9e912fce264c planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents: 8
diff changeset
748
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
749 try:
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
750 experiment_cli(
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
751 dataset=str(dataset_path),
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
752 config=str(config_path),
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
753 output_directory=str(output_dir),
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
754 random_seed=random_seed,
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
755 )
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
756 logger.info(
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
757 f"LudwigDirectBackend: Experiment completed. Results in {output_dir}"
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
758 )
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
759 except TypeError as e:
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
760 logger.error(
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
761 "LudwigDirectBackend: Argument mismatch in experiment_cli call.",
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
762 exc_info=True,
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
763 )
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
764 raise RuntimeError("Ludwig argument error.") from e
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
765 except Exception:
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
766 logger.error(
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
767 "LudwigDirectBackend: Experiment execution error.",
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
768 exc_info=True,
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
769 )
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
770 raise
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
771
2
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
772 def get_training_process(self, output_dir) -> Optional[Dict[str, Any]]:
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
773 """Retrieve the learning rate used in the most recent Ludwig run."""
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
774 output_dir = Path(output_dir)
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
775 exp_dirs = sorted(
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
776 output_dir.glob("experiment_run*"),
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
777 key=lambda p: p.stat().st_mtime,
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
778 )
9
9e912fce264c planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents: 8
diff changeset
779
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
780 if not exp_dirs:
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
781 logger.warning(f"No experiment run directories found in {output_dir}")
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
782 return None
9
9e912fce264c planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents: 8
diff changeset
783
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
784 progress_file = exp_dirs[-1] / "model" / "training_progress.json"
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
785 if not progress_file.exists():
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
786 logger.warning(f"No training_progress.json found in {progress_file}")
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
787 return None
9
9e912fce264c planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents: 8
diff changeset
788
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
789 try:
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
790 with progress_file.open("r", encoding="utf-8") as f:
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
791 data = json.load(f)
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
792 return {
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
793 "learning_rate": data.get("learning_rate"),
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
794 "batch_size": data.get("batch_size"),
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
795 "epoch": data.get("epoch"),
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
796 }
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
797 except Exception as e:
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
798 logger.warning(f"Failed to read training progress info: {e}")
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
799 return {}
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
800
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
801 def convert_parquet_to_csv(self, output_dir: Path):
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
802 """Convert the predictions Parquet file to CSV."""
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
803 output_dir = Path(output_dir)
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
804 exp_dirs = sorted(
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
805 output_dir.glob("experiment_run*"),
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
806 key=lambda p: p.stat().st_mtime,
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
807 )
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
808 if not exp_dirs:
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
809 logger.warning(f"No experiment run dirs found in {output_dir}")
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
810 return
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
811 exp_dir = exp_dirs[-1]
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
812 parquet_path = exp_dir / PREDICTIONS_PARQUET_FILE_NAME
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
813 csv_path = exp_dir / "predictions.csv"
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
814 try:
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
815 df = pd.read_parquet(parquet_path)
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
816 df.to_csv(csv_path, index=False)
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
817 logger.info(f"Converted Parquet to CSV: {csv_path}")
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
818 except Exception as e:
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
819 logger.error(f"Error converting Parquet to CSV: {e}")
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
820
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
821 def generate_plots(self, output_dir: Path) -> None:
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
822 """Generate all registered Ludwig visualizations for the latest experiment run."""
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
823 logger.info("Generating all Ludwig visualizations…")
9
9e912fce264c planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents: 8
diff changeset
824
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
825 test_plots = {
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
826 "compare_performance",
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
827 "compare_classifiers_performance_from_prob",
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
828 "compare_classifiers_performance_from_pred",
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
829 "compare_classifiers_performance_changing_k",
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
830 "compare_classifiers_multiclass_multimetric",
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
831 "compare_classifiers_predictions",
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
832 "confidence_thresholding_2thresholds_2d",
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
833 "confidence_thresholding_2thresholds_3d",
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
834 "confidence_thresholding",
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
835 "confidence_thresholding_data_vs_acc",
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
836 "binary_threshold_vs_metric",
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
837 "roc_curves",
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
838 "roc_curves_from_test_statistics",
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
839 "calibration_1_vs_all",
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
840 "calibration_multiclass",
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
841 "confusion_matrix",
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
842 "frequency_vs_f1",
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
843 }
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
844 train_plots = {
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
845 "learning_curves",
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
846 "compare_classifiers_performance_subset",
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
847 }
9
9e912fce264c planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents: 8
diff changeset
848
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
849 output_dir = Path(output_dir)
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
850 exp_dirs = sorted(
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
851 output_dir.glob("experiment_run*"),
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
852 key=lambda p: p.stat().st_mtime,
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
853 )
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
854 if not exp_dirs:
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
855 logger.warning(f"No experiment run dirs found in {output_dir}")
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
856 return
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
857 exp_dir = exp_dirs[-1]
9
9e912fce264c planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents: 8
diff changeset
858
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
859 viz_dir = exp_dir / "visualizations"
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
860 viz_dir.mkdir(exist_ok=True)
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
861 train_viz = viz_dir / "train"
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
862 test_viz = viz_dir / "test"
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
863 train_viz.mkdir(parents=True, exist_ok=True)
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
864 test_viz.mkdir(parents=True, exist_ok=True)
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
865
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
866 def _check(p: Path) -> Optional[str]:
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
867 return str(p) if p.exists() else None
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
868
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
869 training_stats = _check(exp_dir / "training_statistics.json")
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
870 test_stats = _check(exp_dir / TEST_STATISTICS_FILE_NAME)
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
871 probs_path = _check(exp_dir / PREDICTIONS_PARQUET_FILE_NAME)
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
872 gt_metadata = _check(exp_dir / "model" / TRAIN_SET_METADATA_FILE_NAME)
9
9e912fce264c planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents: 8
diff changeset
873
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
874 dataset_path = None
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
875 split_file = None
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
876 desc = exp_dir / DESCRIPTION_FILE_NAME
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
877 if desc.exists():
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
878 with open(desc, "r") as f:
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
879 cfg = json.load(f)
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
880 dataset_path = _check(Path(cfg.get("dataset", "")))
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
881 split_file = _check(Path(get_split_path(cfg.get("dataset", ""))))
9
9e912fce264c planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents: 8
diff changeset
882
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
883 output_feature = ""
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
884 if desc.exists():
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
885 try:
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
886 output_feature = cfg["config"]["output_features"][0]["name"]
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
887 except Exception:
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
888 pass
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
889 if not output_feature and test_stats:
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
890 with open(test_stats, "r") as f:
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
891 stats = json.load(f)
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
892 output_feature = next(iter(stats.keys()), "")
9
9e912fce264c planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents: 8
diff changeset
893
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
894 viz_registry = get_visualizations_registry()
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
895 for viz_name, viz_func in viz_registry.items():
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
896 if viz_name in train_plots:
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
897 viz_dir_plot = train_viz
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
898 elif viz_name in test_plots:
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
899 viz_dir_plot = test_viz
2
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
900 else:
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
901 continue
9
9e912fce264c planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents: 8
diff changeset
902
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
903 try:
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
904 viz_func(
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
905 training_statistics=[training_stats] if training_stats else [],
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
906 test_statistics=[test_stats] if test_stats else [],
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
907 probabilities=[probs_path] if probs_path else [],
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
908 output_feature_name=output_feature,
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
909 ground_truth_split=2,
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
910 top_n_classes=[0],
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
911 top_k=3,
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
912 ground_truth_metadata=gt_metadata,
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
913 ground_truth=dataset_path,
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
914 split_file=split_file,
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
915 output_directory=str(viz_dir_plot),
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
916 normalize=False,
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
917 file_format="png",
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
918 )
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
919 logger.info(f"✔ Generated {viz_name}")
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
920 except Exception as e:
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
921 logger.warning(f"✘ Skipped {viz_name}: {e}")
9
9e912fce264c planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents: 8
diff changeset
922
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
923 logger.info(f"All visualizations written to {viz_dir}")
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
924
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
925 def generate_html_report(
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
926 self,
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
927 title: str,
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
928 output_dir: str,
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
929 config: dict,
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
930 split_info: str,
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
931 ) -> Path:
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
932 """Assemble an HTML report from visualizations under train_val/ and test/ folders."""
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
933 cwd = Path.cwd()
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
934 report_name = title.lower().replace(" ", "_") + "_report.html"
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
935 report_path = cwd / report_name
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
936 output_dir = Path(output_dir)
2
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
937 output_type = None
9
9e912fce264c planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents: 8
diff changeset
938
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
939 exp_dirs = sorted(
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
940 output_dir.glob("experiment_run*"),
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
941 key=lambda p: p.stat().st_mtime,
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
942 )
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
943 if not exp_dirs:
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
944 raise RuntimeError(f"No 'experiment*' dirs found in {output_dir}")
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
945 exp_dir = exp_dirs[-1]
9
9e912fce264c planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents: 8
diff changeset
946
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
947 base_viz_dir = exp_dir / "visualizations"
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
948 train_viz_dir = base_viz_dir / "train"
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
949 test_viz_dir = base_viz_dir / "test"
9
9e912fce264c planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents: 8
diff changeset
950
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
951 html = get_html_template()
10
b0d893d04d4c planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 9
diff changeset
952
b0d893d04d4c planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 9
diff changeset
953 # Extra CSS & JS: center Plotly and enable CSV download for predictions table
b0d893d04d4c planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 9
diff changeset
954 html += """
b0d893d04d4c planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 9
diff changeset
955 <style>
b0d893d04d4c planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 9
diff changeset
956 /* Center Plotly figures (both wrapper and native classes) */
b0d893d04d4c planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 9
diff changeset
957 .plotly-center { display: flex; justify-content: center; }
b0d893d04d4c planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 9
diff changeset
958 .plotly-center .plotly-graph-div, .plotly-center .js-plotly-plot { margin: 0 auto !important; }
b0d893d04d4c planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 9
diff changeset
959 .js-plotly-plot, .plotly-graph-div { margin-left: auto !important; margin-right: auto !important; }
b0d893d04d4c planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 9
diff changeset
960
b0d893d04d4c planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 9
diff changeset
961 /* Download button for predictions table */
b0d893d04d4c planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 9
diff changeset
962 .download-btn {
b0d893d04d4c planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 9
diff changeset
963 padding: 8px 12px;
b0d893d04d4c planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 9
diff changeset
964 border: 1px solid #4CAF50;
b0d893d04d4c planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 9
diff changeset
965 background: #4CAF50;
b0d893d04d4c planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 9
diff changeset
966 color: white;
b0d893d04d4c planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 9
diff changeset
967 border-radius: 6px;
b0d893d04d4c planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 9
diff changeset
968 cursor: pointer;
b0d893d04d4c planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 9
diff changeset
969 }
b0d893d04d4c planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 9
diff changeset
970 .download-btn:hover { filter: brightness(0.95); }
b0d893d04d4c planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 9
diff changeset
971 .preds-controls {
b0d893d04d4c planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 9
diff changeset
972 display: flex;
b0d893d04d4c planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 9
diff changeset
973 justify-content: flex-end;
b0d893d04d4c planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 9
diff changeset
974 gap: 8px;
b0d893d04d4c planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 9
diff changeset
975 margin: 8px 0;
b0d893d04d4c planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 9
diff changeset
976 }
b0d893d04d4c planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 9
diff changeset
977 </style>
b0d893d04d4c planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 9
diff changeset
978 <script>
b0d893d04d4c planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 9
diff changeset
979 function tableToCSV(table){
b0d893d04d4c planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 9
diff changeset
980 const rows = Array.from(table.querySelectorAll('tr'));
b0d893d04d4c planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 9
diff changeset
981 return rows.map(row =>
b0d893d04d4c planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 9
diff changeset
982 Array.from(row.querySelectorAll('th,td')).map(cell => {
b0d893d04d4c planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 9
diff changeset
983 let text = cell.innerText.replace(/\\r?\\n|\\r/g,' ').trim();
b0d893d04d4c planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 9
diff changeset
984 if (text.includes('"') || text.includes(',')) {
b0d893d04d4c planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 9
diff changeset
985 text = '"' + text.replace(/"/g,'""') + '"';
b0d893d04d4c planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 9
diff changeset
986 }
b0d893d04d4c planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 9
diff changeset
987 return text;
b0d893d04d4c planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 9
diff changeset
988 }).join(',')
b0d893d04d4c planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 9
diff changeset
989 ).join('\\n');
b0d893d04d4c planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 9
diff changeset
990 }
b0d893d04d4c planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 9
diff changeset
991 document.addEventListener('DOMContentLoaded', function(){
b0d893d04d4c planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 9
diff changeset
992 const btn = document.getElementById('downloadPredsCsv');
b0d893d04d4c planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 9
diff changeset
993 if(btn){
b0d893d04d4c planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 9
diff changeset
994 btn.addEventListener('click', function(){
b0d893d04d4c planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 9
diff changeset
995 const tbl = document.querySelector('.predictions-table');
b0d893d04d4c planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 9
diff changeset
996 if(!tbl){ alert('Predictions table not found.'); return; }
b0d893d04d4c planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 9
diff changeset
997 const csv = tableToCSV(tbl);
b0d893d04d4c planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 9
diff changeset
998 const blob = new Blob([csv], {type: 'text/csv;charset=utf-8;'});
b0d893d04d4c planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 9
diff changeset
999 const url = URL.createObjectURL(blob);
b0d893d04d4c planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 9
diff changeset
1000 const a = document.createElement('a');
b0d893d04d4c planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 9
diff changeset
1001 a.href = url;
b0d893d04d4c planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 9
diff changeset
1002 a.download = 'ground_truth_vs_predictions.csv';
b0d893d04d4c planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 9
diff changeset
1003 document.body.appendChild(a);
b0d893d04d4c planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 9
diff changeset
1004 a.click();
b0d893d04d4c planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 9
diff changeset
1005 document.body.removeChild(a);
b0d893d04d4c planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 9
diff changeset
1006 URL.revokeObjectURL(url);
b0d893d04d4c planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 9
diff changeset
1007 });
b0d893d04d4c planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 9
diff changeset
1008 }
b0d893d04d4c planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 9
diff changeset
1009 });
b0d893d04d4c planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 9
diff changeset
1010 </script>
b0d893d04d4c planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 9
diff changeset
1011 """
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1012 html += f"<h1>{title}</h1>"
9
9e912fce264c planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents: 8
diff changeset
1013
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1014 metrics_html = ""
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1015 train_val_metrics_html = ""
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1016 test_metrics_html = ""
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1017 try:
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1018 train_stats_path = exp_dir / "training_statistics.json"
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1019 test_stats_path = exp_dir / TEST_STATISTICS_FILE_NAME
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1020 if train_stats_path.exists() and test_stats_path.exists():
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1021 with open(train_stats_path) as f:
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1022 train_stats = json.load(f)
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1023 with open(test_stats_path) as f:
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1024 test_stats = json.load(f)
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1025 output_type = detect_output_type(test_stats)
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1026 metrics_html = format_stats_table_html(train_stats, test_stats)
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1027 train_val_metrics_html = format_train_val_stats_table_html(
2
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
1028 train_stats, test_stats
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1029 )
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1030 test_metrics_html = format_test_merged_stats_table_html(
2
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
1031 extract_metrics_from_json(train_stats, test_stats, output_type)[
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
1032 "test"
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
1033 ]
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1034 )
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1035 except Exception as e:
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1036 logger.warning(
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1037 f"Could not load stats for HTML report: {type(e).__name__}: {e}"
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1038 )
9
9e912fce264c planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents: 8
diff changeset
1039
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1040 config_html = ""
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1041 training_progress = self.get_training_process(output_dir)
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1042 try:
2
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
1043 config_html = format_config_table_html(
9
9e912fce264c planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents: 8
diff changeset
1044 config, split_info, training_progress, output_type
2
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
1045 )
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1046 except Exception as e:
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1047 logger.warning(f"Could not load config for HTML report: {e}")
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1048
10
b0d893d04d4c planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 9
diff changeset
1049 # ---------- image rendering with exclusions ----------
2
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
1050 def render_img_section(
10
b0d893d04d4c planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 9
diff changeset
1051 title: str,
b0d893d04d4c planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 9
diff changeset
1052 dir_path: Path,
b0d893d04d4c planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 9
diff changeset
1053 output_type: str = None,
b0d893d04d4c planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 9
diff changeset
1054 exclude_names: Optional[set] = None,
2
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
1055 ) -> str:
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1056 if not dir_path.exists():
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1057 return f"<h2>{title}</h2><p><em>Directory not found.</em></p>"
10
b0d893d04d4c planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 9
diff changeset
1058
b0d893d04d4c planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 9
diff changeset
1059 exclude_names = exclude_names or set()
b0d893d04d4c planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 9
diff changeset
1060
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1061 imgs = list(dir_path.glob("*.png"))
10
b0d893d04d4c planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 9
diff changeset
1062
b0d893d04d4c planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 9
diff changeset
1063 default_exclude = {"confusion_matrix.png", "roc_curves.png"}
b0d893d04d4c planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 9
diff changeset
1064
8
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
1065 imgs = [
9
9e912fce264c planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents: 8
diff changeset
1066 img
9e912fce264c planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents: 8
diff changeset
1067 for img in imgs
10
b0d893d04d4c planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 9
diff changeset
1068 if img.name not in default_exclude
b0d893d04d4c planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 9
diff changeset
1069 and img.name not in exclude_names
b0d893d04d4c planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 9
diff changeset
1070 and not img.name.startswith("confusion_matrix__label_top")
8
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
1071 ]
10
b0d893d04d4c planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 9
diff changeset
1072
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1073 if not imgs:
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1074 return f"<h2>{title}</h2><p><em>No plots found.</em></p>"
10
b0d893d04d4c planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 9
diff changeset
1075
8
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
1076 if output_type == "binary":
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1077 order = [
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1078 "roc_curves_from_prediction_statistics.png",
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1079 "compare_performance_label.png",
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1080 "confusion_matrix_entropy__label_top2.png",
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1081 ]
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1082 img_names = {img.name: img for img in imgs}
8
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
1083 ordered = [img_names[n] for n in order if n in img_names]
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
1084 others = sorted(img for img in imgs if img.name not in order)
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
1085 imgs = ordered + others
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
1086 elif output_type == "category":
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1087 unwanted = {
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1088 "compare_classifiers_multiclass_multimetric__label_best10.png",
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1089 "compare_classifiers_multiclass_multimetric__label_top10.png",
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1090 "compare_classifiers_multiclass_multimetric__label_worst10.png",
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1091 }
10
b0d893d04d4c planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 9
diff changeset
1092 valid_imgs = [img for img in imgs if img.name not in unwanted]
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1093 display_order = [
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1094 "roc_curves.png",
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1095 "compare_performance_label.png",
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1096 "compare_classifiers_performance_from_prob.png",
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1097 "confusion_matrix_entropy__label_top10.png",
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1098 ]
8
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
1099 img_map = {img.name: img for img in valid_imgs}
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
1100 ordered = [img_map[n] for n in display_order if n in img_map]
9
9e912fce264c planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents: 8
diff changeset
1101 others = sorted(
9e912fce264c planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents: 8
diff changeset
1102 img for img in valid_imgs if img.name not in display_order
9e912fce264c planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents: 8
diff changeset
1103 )
8
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
1104 imgs = ordered + others
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1105 else:
8
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
1106 imgs = sorted(imgs)
10
b0d893d04d4c planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 9
diff changeset
1107
b0d893d04d4c planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 9
diff changeset
1108 html_section = ""
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1109 for img in imgs:
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1110 b64 = encode_image_to_base64(str(img))
8
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
1111 img_title = img.stem.replace("_", " ").title()
10
b0d893d04d4c planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 9
diff changeset
1112 html_section += (
8
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
1113 f"<h2 style='text-align: center;'>{img_title}</h2>"
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1114 f'<div class="plot" style="margin-bottom:20px;text-align:center;">'
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1115 f'<img src="data:image/png;base64,{b64}" '
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1116 f'style="max-width:90%;max-height:600px;border:1px solid #ddd;" />'
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1117 f"</div>"
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1118 )
10
b0d893d04d4c planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 9
diff changeset
1119 return html_section
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1120
2
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
1121 tab1_content = config_html + metrics_html
10
b0d893d04d4c planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 9
diff changeset
1122
2
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
1123 tab2_content = train_val_metrics_html + render_img_section(
10
b0d893d04d4c planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 9
diff changeset
1124 "Training and Validation Visualizations",
b0d893d04d4c planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 9
diff changeset
1125 train_viz_dir,
b0d893d04d4c planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 9
diff changeset
1126 output_type,
b0d893d04d4c planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 9
diff changeset
1127 exclude_names={
b0d893d04d4c planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 9
diff changeset
1128 "compare_classifiers_performance_from_prob.png",
b0d893d04d4c planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 9
diff changeset
1129 "roc_curves_from_prediction_statistics.png",
b0d893d04d4c planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 9
diff changeset
1130 "precision_recall_curves_from_prediction_statistics.png",
b0d893d04d4c planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 9
diff changeset
1131 "precision_recall_curve.png",
b0d893d04d4c planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 9
diff changeset
1132 },
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1133 )
10
b0d893d04d4c planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 9
diff changeset
1134
b0d893d04d4c planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 9
diff changeset
1135 # --- Predictions vs Ground Truth table (REGRESSION ONLY) ---
2
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
1136 preds_section = ""
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
1137 parquet_path = exp_dir / PREDICTIONS_PARQUET_FILE_NAME
8
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
1138 if output_type == "regression" and parquet_path.exists():
2
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
1139 try:
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
1140 # 1) load predictions from Parquet
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
1141 df_preds = pd.read_parquet(parquet_path).reset_index(drop=True)
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
1142 # assume the column containing your model's prediction is named "prediction"
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
1143 pred_col = next(
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
1144 (c for c in df_preds.columns if "prediction" in c.lower()),
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
1145 None,
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
1146 )
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
1147 if pred_col is None:
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
1148 raise ValueError("No prediction column found in Parquet output")
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
1149 df_pred = df_preds[[pred_col]].rename(columns={pred_col: "prediction"})
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
1150 # 2) load ground truth for the test split from prepared CSV
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
1151 df_all = pd.read_csv(config["label_column_data_path"])
9
9e912fce264c planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents: 8
diff changeset
1152 df_gt = df_all[df_all[SPLIT_COLUMN_NAME] == 2][
9e912fce264c planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents: 8
diff changeset
1153 LABEL_COLUMN_NAME
9e912fce264c planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents: 8
diff changeset
1154 ].reset_index(drop=True)
8
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
1155 # 3) concatenate side-by-side
2
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
1156 df_table = pd.concat([df_gt, df_pred], axis=1)
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
1157 df_table.columns = [LABEL_COLUMN_NAME, "prediction"]
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
1158 # 4) render as HTML
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
1159 preds_html = df_table.to_html(index=False, classes="predictions-table")
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
1160 preds_section = (
8
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
1161 "<h2 style='text-align: center;'>Ground Truth vs. Predictions</h2>"
10
b0d893d04d4c planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 9
diff changeset
1162 "<div class='preds-controls'>"
b0d893d04d4c planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 9
diff changeset
1163 "<button id='downloadPredsCsv' class='download-btn'>Download CSV</button>"
b0d893d04d4c planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 9
diff changeset
1164 "</div>"
b0d893d04d4c planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 9
diff changeset
1165 "<div class='scroll-rows-30' style='overflow-x:auto; overflow-y:auto; max-height:900px; margin-bottom:20px;'>"
2
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
1166 + preds_html
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
1167 + "</div>"
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
1168 )
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
1169 except Exception as e:
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
1170 logger.warning(f"Could not build Predictions vs GT table: {e}")
10
b0d893d04d4c planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 9
diff changeset
1171
8
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
1172 tab3_content = test_metrics_html + preds_section
10
b0d893d04d4c planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 9
diff changeset
1173
b0d893d04d4c planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 9
diff changeset
1174 # Classification-only interactive Plotly panels (centered)
8
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
1175 if output_type in ("binary", "category"):
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
1176 training_stats_path = exp_dir / "training_statistics.json"
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
1177 interactive_plots = build_classification_plots(
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
1178 str(test_stats_path),
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
1179 str(training_stats_path),
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
1180 )
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
1181 for plot in interactive_plots:
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
1182 tab3_content += (
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
1183 f"<h2 style='text-align: center;'>{plot['title']}</h2>"
10
b0d893d04d4c planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 9
diff changeset
1184 f"<div class='plotly-center'>{plot['html']}</div>"
8
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
1185 )
10
b0d893d04d4c planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 9
diff changeset
1186
b0d893d04d4c planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 9
diff changeset
1187 # Add static TEST PNGs (with default dedupe/exclusions)
b0d893d04d4c planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 9
diff changeset
1188 tab3_content += render_img_section(
b0d893d04d4c planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 9
diff changeset
1189 "Test Visualizations", test_viz_dir, output_type
b0d893d04d4c planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 9
diff changeset
1190 )
b0d893d04d4c planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents: 9
diff changeset
1191
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1192 tabbed_html = build_tabbed_html(tab1_content, tab2_content, tab3_content)
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1193 modal_html = get_metrics_help_modal()
2
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
1194 html += tabbed_html + modal_html + get_html_closing()
9
9e912fce264c planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents: 8
diff changeset
1195
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1196 try:
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1197 with open(report_path, "w") as f:
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1198 f.write(html)
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1199 logger.info(f"HTML report generated at: {report_path}")
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1200 except Exception as e:
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1201 logger.error(f"Failed to write HTML report: {e}")
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1202 raise
9
9e912fce264c planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents: 8
diff changeset
1203
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1204 return report_path
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1205
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1206
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1207 class WorkflowOrchestrator:
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1208 """Manages the image-classification workflow."""
9
9e912fce264c planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents: 8
diff changeset
1209
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1210 def __init__(self, args: argparse.Namespace, backend: Backend):
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1211 self.args = args
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1212 self.backend = backend
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1213 self.temp_dir: Optional[Path] = None
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1214 self.image_extract_dir: Optional[Path] = None
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1215 logger.info(f"Orchestrator initialized with backend: {type(backend).__name__}")
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1216
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1217 def _create_temp_dirs(self) -> None:
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1218 """Create temporary output and image extraction directories."""
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1219 try:
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1220 self.temp_dir = Path(
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1221 tempfile.mkdtemp(dir=self.args.output_dir, prefix=TEMP_DIR_PREFIX)
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1222 )
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1223 self.image_extract_dir = self.temp_dir / "images"
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1224 self.image_extract_dir.mkdir()
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1225 logger.info(f"Created temp directory: {self.temp_dir}")
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1226 except Exception:
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1227 logger.error("Failed to create temporary directories", exc_info=True)
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1228 raise
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1229
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1230 def _extract_images(self) -> None:
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1231 """Extract images from ZIP into the temp image directory."""
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1232 if self.image_extract_dir is None:
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1233 raise RuntimeError("Temp image directory not initialized.")
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1234 logger.info(
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1235 f"Extracting images from {self.args.image_zip} → {self.image_extract_dir}"
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1236 )
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1237 try:
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1238 with zipfile.ZipFile(self.args.image_zip, "r") as z:
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1239 z.extractall(self.image_extract_dir)
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1240 logger.info("Image extraction complete.")
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1241 except Exception:
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1242 logger.error("Error extracting zip file", exc_info=True)
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1243 raise
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1244
2
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
1245 def _prepare_data(self) -> Tuple[Path, Dict[str, Any], str]:
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1246 """Load CSV, update image paths, handle splits, and write prepared CSV."""
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1247 if not self.temp_dir or not self.image_extract_dir:
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1248 raise RuntimeError("Temp dirs not initialized before data prep.")
9
9e912fce264c planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents: 8
diff changeset
1249
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1250 try:
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1251 df = pd.read_csv(self.args.csv_file)
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1252 logger.info(f"Loaded CSV: {self.args.csv_file}")
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1253 except Exception:
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1254 logger.error("Error loading CSV file", exc_info=True)
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1255 raise
9
9e912fce264c planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents: 8
diff changeset
1256
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1257 required = {IMAGE_PATH_COLUMN_NAME, LABEL_COLUMN_NAME}
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1258 missing = required - set(df.columns)
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1259 if missing:
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1260 raise ValueError(f"Missing CSV columns: {', '.join(missing)}")
9
9e912fce264c planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents: 8
diff changeset
1261
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1262 try:
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1263 df[IMAGE_PATH_COLUMN_NAME] = df[IMAGE_PATH_COLUMN_NAME].apply(
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1264 lambda p: str((self.image_extract_dir / p).resolve())
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1265 )
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1266 except Exception:
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1267 logger.error("Error updating image paths", exc_info=True)
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1268 raise
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1269 if SPLIT_COLUMN_NAME in df.columns:
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1270 df, split_config, split_info = self._process_fixed_split(df)
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1271 else:
7
801a8b6973fb planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents: 6
diff changeset
1272 logger.info("No split column; creating stratified random split")
801a8b6973fb planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents: 6
diff changeset
1273 df = create_stratified_random_split(
801a8b6973fb planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents: 6
diff changeset
1274 df=df,
801a8b6973fb planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents: 6
diff changeset
1275 split_column=SPLIT_COLUMN_NAME,
801a8b6973fb planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents: 6
diff changeset
1276 split_probabilities=self.args.split_probabilities,
801a8b6973fb planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents: 6
diff changeset
1277 random_state=self.args.random_seed,
801a8b6973fb planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents: 6
diff changeset
1278 label_column=LABEL_COLUMN_NAME,
801a8b6973fb planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents: 6
diff changeset
1279 )
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1280 split_config = {
7
801a8b6973fb planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents: 6
diff changeset
1281 "type": "fixed",
801a8b6973fb planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents: 6
diff changeset
1282 "column": SPLIT_COLUMN_NAME,
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1283 }
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1284 split_info = (
7
801a8b6973fb planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents: 6
diff changeset
1285 f"No split column in CSV. Created stratified random split: "
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1286 f"{[int(p * 100) for p in self.args.split_probabilities]}% "
7
801a8b6973fb planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents: 6
diff changeset
1287 f"for train/val/test with balanced label distribution."
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1288 )
9
9e912fce264c planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents: 8
diff changeset
1289
2
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
1290 final_csv = self.temp_dir / TEMP_CSV_FILENAME
9
9e912fce264c planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents: 8
diff changeset
1291
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1292 try:
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1293 df.to_csv(final_csv, index=False)
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1294 logger.info(f"Saved prepared data to {final_csv}")
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1295 except Exception:
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1296 logger.error("Error saving prepared CSV", exc_info=True)
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1297 raise
9
9e912fce264c planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents: 8
diff changeset
1298
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1299 return final_csv, split_config, split_info
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1300
2
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
1301 def _process_fixed_split(
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
1302 self, df: pd.DataFrame
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
1303 ) -> Tuple[pd.DataFrame, Dict[str, Any], str]:
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1304 """Process a fixed split column (0=train,1=val,2=test)."""
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1305 logger.info(f"Fixed split column '{SPLIT_COLUMN_NAME}' detected.")
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1306 try:
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1307 col = df[SPLIT_COLUMN_NAME]
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1308 df[SPLIT_COLUMN_NAME] = pd.to_numeric(col, errors="coerce").astype(
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1309 pd.Int64Dtype()
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1310 )
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1311 if df[SPLIT_COLUMN_NAME].isna().any():
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1312 logger.warning("Split column contains non-numeric/missing values.")
9
9e912fce264c planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents: 8
diff changeset
1313
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1314 unique = set(df[SPLIT_COLUMN_NAME].dropna().unique())
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1315 logger.info(f"Unique split values: {unique}")
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1316 if unique == {0, 2}:
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1317 df = split_data_0_2(
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1318 df,
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1319 SPLIT_COLUMN_NAME,
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1320 validation_size=self.args.validation_size,
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1321 label_column=LABEL_COLUMN_NAME,
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1322 random_state=self.args.random_seed,
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1323 )
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1324 split_info = (
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1325 "Detected a split column (with values 0 and 2) in the input CSV. "
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1326 f"Used this column as a base and reassigned "
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1327 f"{self.args.validation_size * 100:.1f}% "
7
801a8b6973fb planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents: 6
diff changeset
1328 "of the training set (originally labeled 0) to validation (labeled 1) using stratified sampling."
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1329 )
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1330 logger.info("Applied custom 0/2 split.")
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1331 elif unique.issubset({0, 1, 2}):
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1332 split_info = "Used user-defined split column from CSV."
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1333 logger.info("Using fixed split as-is.")
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1334 else:
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1335 raise ValueError(f"Unexpected split values: {unique}")
9
9e912fce264c planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents: 8
diff changeset
1336
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1337 return df, {"type": "fixed", "column": SPLIT_COLUMN_NAME}, split_info
9
9e912fce264c planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents: 8
diff changeset
1338
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1339 except Exception:
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1340 logger.error("Error processing fixed split", exc_info=True)
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1341 raise
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1342
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1343 def _cleanup_temp_dirs(self) -> None:
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1344 if self.temp_dir and self.temp_dir.exists():
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1345 logger.info(f"Cleaning up temp directory: {self.temp_dir}")
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1346 shutil.rmtree(self.temp_dir, ignore_errors=True)
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1347 self.temp_dir = None
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1348 self.image_extract_dir = None
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1349
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1350 def run(self) -> None:
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1351 """Execute the full workflow end-to-end."""
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1352 logger.info("Starting workflow...")
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1353 self.args.output_dir.mkdir(parents=True, exist_ok=True)
9
9e912fce264c planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents: 8
diff changeset
1354
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1355 try:
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1356 self._create_temp_dirs()
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1357 self._extract_images()
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1358 csv_path, split_cfg, split_info = self._prepare_data()
9
9e912fce264c planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents: 8
diff changeset
1359
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1360 use_pretrained = self.args.use_pretrained or self.args.fine_tune
9
9e912fce264c planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents: 8
diff changeset
1361
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1362 backend_args = {
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1363 "model_name": self.args.model_name,
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1364 "fine_tune": self.args.fine_tune,
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1365 "use_pretrained": use_pretrained,
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1366 "epochs": self.args.epochs,
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1367 "batch_size": self.args.batch_size,
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1368 "preprocessing_num_processes": self.args.preprocessing_num_processes,
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1369 "split_probabilities": self.args.split_probabilities,
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1370 "learning_rate": self.args.learning_rate,
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1371 "random_seed": self.args.random_seed,
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1372 "early_stop": self.args.early_stop,
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1373 "label_column_data_path": csv_path,
2
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
1374 "augmentation": self.args.augmentation,
8
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
1375 "threshold": self.args.threshold,
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1376 }
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1377 yaml_str = self.backend.prepare_config(backend_args, split_cfg)
9
9e912fce264c planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents: 8
diff changeset
1378
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1379 config_file = self.temp_dir / TEMP_CONFIG_FILENAME
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1380 config_file.write_text(yaml_str)
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1381 logger.info(f"Wrote backend config: {config_file}")
9
9e912fce264c planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents: 8
diff changeset
1382
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1383 self.backend.run_experiment(
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1384 csv_path,
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1385 config_file,
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1386 self.args.output_dir,
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1387 self.args.random_seed,
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1388 )
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1389 logger.info("Workflow completed successfully.")
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1390 self.backend.generate_plots(self.args.output_dir)
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1391 report_file = self.backend.generate_html_report(
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1392 "Image Classification Results",
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1393 self.args.output_dir,
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1394 backend_args,
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1395 split_info,
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1396 )
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1397 logger.info(f"HTML report generated at: {report_file}")
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1398 self.backend.convert_parquet_to_csv(self.args.output_dir)
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1399 logger.info("Converted Parquet to CSV.")
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1400 except Exception:
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1401 logger.error("Workflow execution failed", exc_info=True)
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1402 raise
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1403 finally:
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1404 self._cleanup_temp_dirs()
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1405
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1406
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1407 def parse_learning_rate(s):
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1408 try:
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1409 return float(s)
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1410 except (TypeError, ValueError):
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1411 return None
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1412
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1413
2
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
1414 def aug_parse(aug_string: str):
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
1415 """
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
1416 Parse comma-separated augmentation keys into Ludwig augmentation dicts.
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
1417 Raises ValueError on unknown key.
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
1418 """
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
1419 mapping = {
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
1420 "random_horizontal_flip": {"type": "random_horizontal_flip"},
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
1421 "random_vertical_flip": {"type": "random_vertical_flip"},
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
1422 "random_rotate": {"type": "random_rotate", "degree": 10},
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
1423 "random_blur": {"type": "random_blur", "kernel_size": 3},
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
1424 "random_brightness": {"type": "random_brightness", "min": 0.5, "max": 2.0},
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
1425 "random_contrast": {"type": "random_contrast", "min": 0.5, "max": 2.0},
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
1426 }
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
1427 aug_list = []
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
1428 for tok in aug_string.split(","):
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
1429 key = tok.strip()
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
1430 if key not in mapping:
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
1431 valid = ", ".join(mapping.keys())
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
1432 raise ValueError(f"Unknown augmentation '{key}'. Valid choices: {valid}")
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
1433 aug_list.append(mapping[key])
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
1434 return aug_list
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
1435
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
1436
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1437 class SplitProbAction(argparse.Action):
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1438 def __call__(self, parser, namespace, values, option_string=None):
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1439 train, val, test = values
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1440 total = train + val + test
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1441 if abs(total - 1.0) > 1e-6:
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1442 parser.error(
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1443 f"--split-probabilities must sum to 1.0; "
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1444 f"got {train:.3f} + {val:.3f} + {test:.3f} = {total:.3f}"
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1445 )
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1446 setattr(namespace, self.dest, values)
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1447
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1448
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1449 def main():
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1450 parser = argparse.ArgumentParser(
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1451 description="Image Classification Learner with Pluggable Backends",
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1452 )
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1453 parser.add_argument(
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1454 "--csv-file",
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1455 required=True,
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1456 type=Path,
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1457 help="Path to the input CSV",
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1458 )
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1459 parser.add_argument(
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1460 "--image-zip",
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1461 required=True,
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1462 type=Path,
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1463 help="Path to the images ZIP",
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1464 )
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1465 parser.add_argument(
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1466 "--model-name",
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1467 required=True,
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1468 choices=MODEL_ENCODER_TEMPLATES.keys(),
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1469 help="Which model template to use",
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1470 )
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1471 parser.add_argument(
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1472 "--use-pretrained",
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1473 action="store_true",
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1474 help="Use pretrained weights for the model",
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1475 )
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1476 parser.add_argument(
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1477 "--fine-tune",
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1478 action="store_true",
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1479 help="Enable fine-tuning",
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1480 )
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1481 parser.add_argument(
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1482 "--epochs",
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1483 type=int,
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1484 default=10,
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1485 help="Number of training epochs",
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1486 )
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1487 parser.add_argument(
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1488 "--early-stop",
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1489 type=int,
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1490 default=5,
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1491 help="Early stopping patience",
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1492 )
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1493 parser.add_argument(
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1494 "--batch-size",
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1495 type=int,
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1496 help="Batch size (None = auto)",
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1497 )
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1498 parser.add_argument(
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1499 "--output-dir",
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1500 type=Path,
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1501 default=Path("learner_output"),
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1502 help="Where to write outputs",
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1503 )
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1504 parser.add_argument(
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1505 "--validation-size",
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1506 type=float,
8
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
1507 default=0.15,
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1508 help="Fraction for validation (0.0–1.0)",
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1509 )
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1510 parser.add_argument(
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1511 "--preprocessing-num-processes",
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1512 type=int,
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1513 default=max(1, os.cpu_count() // 2),
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1514 help="CPU processes for data prep",
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1515 )
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1516 parser.add_argument(
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1517 "--split-probabilities",
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1518 type=float,
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1519 nargs=3,
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1520 metavar=("train", "val", "test"),
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1521 action=SplitProbAction,
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1522 default=[0.7, 0.1, 0.2],
2
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
1523 help=(
9
9e912fce264c planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents: 8
diff changeset
1524 "Random split proportions (e.g., 0.7 0.1 0.2).Only used if no split column."
2
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
1525 ),
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1526 )
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1527 parser.add_argument(
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1528 "--random-seed",
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1529 type=int,
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1530 default=42,
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1531 help="Random seed used for dataset splitting (default: 42)",
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1532 )
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1533 parser.add_argument(
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1534 "--learning-rate",
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1535 type=parse_learning_rate,
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1536 default=None,
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1537 help="Learning rate. If not provided, Ludwig will auto-select it.",
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1538 )
2
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
1539 parser.add_argument(
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
1540 "--augmentation",
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
1541 type=str,
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
1542 default=None,
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
1543 help=(
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
1544 "Comma-separated list (in order) of any of: "
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
1545 "random_horizontal_flip, random_vertical_flip, random_rotate, "
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
1546 "random_blur, random_brightness, random_contrast. "
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
1547 "E.g. --augmentation random_horizontal_flip,random_rotate"
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
1548 ),
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
1549 )
8
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
1550 parser.add_argument(
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
1551 "--threshold",
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
1552 type=float,
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
1553 default=None,
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
1554 help=(
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
1555 "Decision threshold for binary classification (0.0–1.0)."
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
1556 "Overrides default 0.5."
9
9e912fce264c planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents: 8
diff changeset
1557 ),
8
85e6f4b2ad18 planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents: 7
diff changeset
1558 )
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1559 args = parser.parse_args()
9
9e912fce264c planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents: 8
diff changeset
1560
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1561 if not 0.0 <= args.validation_size <= 1.0:
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1562 parser.error("validation-size must be between 0.0 and 1.0")
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1563 if not args.csv_file.is_file():
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1564 parser.error(f"CSV not found: {args.csv_file}")
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1565 if not args.image_zip.is_file():
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1566 parser.error(f"ZIP not found: {args.image_zip}")
2
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
1567 if args.augmentation is not None:
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
1568 try:
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
1569 augmentation_setup = aug_parse(args.augmentation)
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
1570 setattr(args, "augmentation", augmentation_setup)
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
1571 except ValueError as e:
186424a7eca7 planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents: 1
diff changeset
1572 parser.error(str(e))
9
9e912fce264c planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents: 8
diff changeset
1573
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1574 backend_instance = LudwigDirectBackend()
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1575 orchestrator = WorkflowOrchestrator(args, backend_instance)
9
9e912fce264c planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents: 8
diff changeset
1576
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1577 exit_code = 0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1578 try:
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1579 orchestrator.run()
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1580 logger.info("Main script finished successfully.")
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1581 except Exception as e:
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1582 logger.error(f"Main script failed.{e}")
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1583 exit_code = 1
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1584 finally:
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1585 sys.exit(exit_code)
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1586
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1587
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1588 if __name__ == "__main__":
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1589 try:
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1590 import ludwig
9
9e912fce264c planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents: 8
diff changeset
1591
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1592 logger.debug(f"Found Ludwig version: {ludwig.globals.LUDWIG_VERSION}")
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1593 except ImportError:
1
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1594 logger.error(
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1595 "Ludwig library not found. Please ensure Ludwig is installed "
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1596 "('pip install ludwig[image]')"
39202fe5cf97 planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
1597 )
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1598 sys.exit(1)
9
9e912fce264c planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents: 8
diff changeset
1599
0
54b871dfc51e planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff changeset
1600 main()