Mercurial > repos > goeckslab > image_learner
annotate image_learner_cli.py @ 10:b0d893d04d4c draft default tip
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
author | goeckslab |
---|---|
date | Mon, 08 Sep 2025 22:38:35 +0000 |
parents | 9e912fce264c |
children |
rev | line source |
---|---|
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1 import argparse |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
2 import json |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
3 import logging |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
4 import os |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
5 import shutil |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
6 import sys |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
7 import tempfile |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
8 import zipfile |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
9 from pathlib import Path |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
10 from typing import Any, Dict, Optional, Protocol, Tuple |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
11 |
7
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents:
6
diff
changeset
|
12 import numpy as np |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
13 import pandas as pd |
2
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
14 import pandas.api.types as ptypes |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
15 import yaml |
2
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
16 from constants import ( |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
17 IMAGE_PATH_COLUMN_NAME, |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
18 LABEL_COLUMN_NAME, |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
19 METRIC_DISPLAY_NAMES, |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
20 MODEL_ENCODER_TEMPLATES, |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
21 SPLIT_COLUMN_NAME, |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
22 TEMP_CONFIG_FILENAME, |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
23 TEMP_CSV_FILENAME, |
9
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents:
8
diff
changeset
|
24 TEMP_DIR_PREFIX, |
2
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
25 ) |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
26 from ludwig.globals import ( |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
27 DESCRIPTION_FILE_NAME, |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
28 PREDICTIONS_PARQUET_FILE_NAME, |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
29 TEST_STATISTICS_FILE_NAME, |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
30 TRAIN_SET_METADATA_FILE_NAME, |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
31 ) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
32 from ludwig.utils.data_utils import get_split_path |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
33 from ludwig.visualize import get_visualizations_registry |
8
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
34 from plotly_plots import build_classification_plots |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
35 from sklearn.model_selection import train_test_split |
2
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
36 from utils import ( |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
37 build_tabbed_html, |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
38 encode_image_to_base64, |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
39 get_html_closing, |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
40 get_html_template, |
9
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents:
8
diff
changeset
|
41 get_metrics_help_modal, |
2
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
42 ) |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
43 |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
44 # --- Logging Setup --- |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
45 logging.basicConfig( |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
46 level=logging.INFO, |
9
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents:
8
diff
changeset
|
47 format="%(asctime)s %(levelname)s %(name)s: %(message)s", |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
48 ) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
49 logger = logging.getLogger("ImageLearner") |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
50 |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
51 |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
52 def format_config_table_html( |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
53 config: dict, |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
54 split_info: Optional[str] = None, |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
55 training_progress: dict = None, |
8
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
56 output_type: Optional[str] = None, |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
57 ) -> str: |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
58 display_keys = [ |
2
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
59 "task_type", |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
60 "model_name", |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
61 "epochs", |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
62 "batch_size", |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
63 "fine_tune", |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
64 "use_pretrained", |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
65 "learning_rate", |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
66 "random_seed", |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
67 "early_stop", |
8
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
68 "threshold", |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
69 ] |
9
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents:
8
diff
changeset
|
70 |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
71 rows = [] |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
72 for key in display_keys: |
8
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
73 val = config.get(key, None) |
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
74 if key == "threshold": |
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
75 if output_type != "binary": |
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
76 continue |
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
77 val = val if val is not None else 0.5 |
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
78 val_str = f"{val:.2f}" |
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
79 if val == 0.5: |
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
80 val_str += " (default)" |
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
81 else: |
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
82 if key == "task_type": |
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
83 val_str = val.title() if isinstance(val, str) else "N/A" |
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
84 elif key == "batch_size": |
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
85 if val is not None: |
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
86 val_str = int(val) |
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
87 else: |
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
88 if training_progress: |
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
89 resolved_val = training_progress.get("batch_size") |
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
90 val_str = ( |
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
91 "Auto-selected batch size by Ludwig:<br>" |
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
92 f"<span style='font-size: 0.85em;'>{resolved_val}</span><br>" |
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
93 ) |
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
94 else: |
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
95 val_str = "auto" |
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
96 elif key == "learning_rate": |
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
97 if val is not None and val != "auto": |
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
98 val_str = f"{val:.6f}" |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
99 else: |
8
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
100 if training_progress: |
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
101 resolved_val = training_progress.get("learning_rate") |
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
102 val_str = ( |
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
103 "Auto-selected learning rate by Ludwig:<br>" |
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
104 f"<span style='font-size: 0.85em;'>" |
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
105 f"{resolved_val if resolved_val else 'auto'}</span><br>" |
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
106 "<span style='font-size: 0.85em;'>" |
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
107 "Based on model architecture and training setup " |
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
108 "(e.g., fine-tuning).<br>" |
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
109 "</span>" |
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
110 ) |
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
111 else: |
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
112 val_str = ( |
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
113 "Auto-selected by Ludwig<br>" |
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
114 "<span style='font-size: 0.85em;'>" |
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
115 "Automatically tuned based on architecture and dataset.<br>" |
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
116 "See <a href='https://ludwig.ai/latest/configuration/trainer/" |
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
117 "#trainer-parameters' target='_blank'>" |
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
118 "Ludwig Trainer Parameters</a> for details." |
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
119 "</span>" |
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
120 ) |
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
121 elif key == "epochs": |
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
122 if val is None: |
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
123 val_str = "N/A" |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
124 else: |
8
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
125 if ( |
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
126 training_progress |
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
127 and "epoch" in training_progress |
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
128 and val > training_progress["epoch"] |
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
129 ): |
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
130 val_str = ( |
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
131 f"Because of early stopping: the training " |
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
132 f"stopped at epoch {training_progress['epoch']}" |
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
133 ) |
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
134 else: |
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
135 val_str = val |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
136 else: |
8
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
137 val_str = val if val is not None else "N/A" |
10
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
9
diff
changeset
|
138 if val_str == "N/A" and key not in ["task_type"]: |
8
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
139 continue |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
140 rows.append( |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
141 f"<tr>" |
10
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
9
diff
changeset
|
142 f"<td style='padding: 6px 12px; border: 1px solid #ccc; text-align: left; " |
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
9
diff
changeset
|
143 f"white-space: normal; word-break: break-word; overflow-wrap: anywhere;'>" |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
144 f"{key.replace('_', ' ').title()}</td>" |
10
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
9
diff
changeset
|
145 f"<td style='padding: 6px 12px; border: 1px solid #ccc; text-align: center; " |
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
9
diff
changeset
|
146 f"white-space: normal; word-break: break-word; overflow-wrap: anywhere;'>" |
8
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
147 f"{val_str}</td>" |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
148 f"</tr>" |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
149 ) |
2
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
150 aug_cfg = config.get("augmentation") |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
151 if aug_cfg: |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
152 types = [str(a.get("type", "")) for a in aug_cfg] |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
153 aug_val = ", ".join(types) |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
154 rows.append( |
10
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
9
diff
changeset
|
155 f"<tr><td style='padding: 6px 12px; border: 1px solid #ccc; text-align: left; " |
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
9
diff
changeset
|
156 f"white-space: normal; word-break: break-word; overflow-wrap: anywhere;'>Augmentation</td>" |
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
9
diff
changeset
|
157 f"<td style='padding: 6px 12px; border: 1px solid #ccc; text-align: center; " |
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
9
diff
changeset
|
158 f"white-space: normal; word-break: break-word; overflow-wrap: anywhere;'>{aug_val}</td></tr>" |
2
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
159 ) |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
160 if split_info: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
161 rows.append( |
10
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
9
diff
changeset
|
162 f"<tr><td style='padding: 6px 12px; border: 1px solid #ccc; text-align: left; " |
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
9
diff
changeset
|
163 f"white-space: normal; word-break: break-word; overflow-wrap: anywhere;'>Data Split</td>" |
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
9
diff
changeset
|
164 f"<td style='padding: 6px 12px; border: 1px solid #ccc; text-align: center; " |
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
9
diff
changeset
|
165 f"white-space: normal; word-break: break-word; overflow-wrap: anywhere;'>{split_info}</td></tr>" |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
166 ) |
8
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
167 html = f""" |
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
168 <h2 style="text-align: center;">Model and Training Summary</h2> |
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
169 <div style="display: flex; justify-content: center;"> |
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
170 <table style="border-collapse: collapse; width: 100%; table-layout: fixed;"> |
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
171 <thead><tr> |
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
172 <th style="padding: 10px; border: 1px solid #ccc; text-align: left; white-space: nowrap; overflow: hidden; text-overflow: ellipsis;">Parameter</th> |
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
173 <th style="padding: 10px; border: 1px solid #ccc; text-align: center; white-space: nowrap; overflow: hidden; text-overflow: ellipsis;">Value</th> |
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
174 </tr></thead> |
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
175 <tbody> |
9
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents:
8
diff
changeset
|
176 {"".join(rows)} |
8
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
177 </tbody> |
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
178 </table> |
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
179 </div><br> |
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
180 <p style="text-align: center; font-size: 0.9em;"> |
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
181 Model trained using <a href="https://ludwig.ai/" target="_blank" rel="noopener noreferrer">Ludwig</a>. |
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
182 <a href="https://ludwig.ai/latest/configuration/" target="_blank" rel="noopener noreferrer"> |
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
183 Ludwig documentation provides detailed information about default model and training parameters |
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
184 </a> |
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
185 </p><hr> |
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
186 """ |
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
187 return html |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
188 |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
189 |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
190 def detect_output_type(test_stats): |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
191 """Detects if the output type is 'binary' or 'category' based on test statistics.""" |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
192 label_stats = test_stats.get("label", {}) |
2
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
193 if "mean_squared_error" in label_stats: |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
194 return "regression" |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
195 per_class = label_stats.get("per_class_stats", {}) |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
196 if len(per_class) == 2: |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
197 return "binary" |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
198 return "category" |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
199 |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
200 |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
201 def extract_metrics_from_json( |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
202 train_stats: dict, |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
203 test_stats: dict, |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
204 output_type: str, |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
205 ) -> dict: |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
206 """Extracts relevant metrics from training and test statistics based on the output type.""" |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
207 metrics = {"training": {}, "validation": {}, "test": {}} |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
208 |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
209 def get_last_value(stats, key): |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
210 val = stats.get(key) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
211 if isinstance(val, list) and val: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
212 return val[-1] |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
213 elif isinstance(val, (int, float)): |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
214 return val |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
215 return None |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
216 |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
217 for split in ["training", "validation"]: |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
218 split_stats = train_stats.get(split, {}) |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
219 if not split_stats: |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
220 logging.warning(f"No statistics found for {split} split") |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
221 continue |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
222 label_stats = split_stats.get("label", {}) |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
223 if not label_stats: |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
224 logging.warning(f"No label statistics found for {split} split") |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
225 continue |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
226 if output_type == "binary": |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
227 metrics[split] = { |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
228 "accuracy": get_last_value(label_stats, "accuracy"), |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
229 "loss": get_last_value(label_stats, "loss"), |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
230 "precision": get_last_value(label_stats, "precision"), |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
231 "recall": get_last_value(label_stats, "recall"), |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
232 "specificity": get_last_value(label_stats, "specificity"), |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
233 "roc_auc": get_last_value(label_stats, "roc_auc"), |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
234 } |
2
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
235 elif output_type == "regression": |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
236 metrics[split] = { |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
237 "loss": get_last_value(label_stats, "loss"), |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
238 "mean_absolute_error": get_last_value( |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
239 label_stats, "mean_absolute_error" |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
240 ), |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
241 "mean_absolute_percentage_error": get_last_value( |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
242 label_stats, "mean_absolute_percentage_error" |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
243 ), |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
244 "mean_squared_error": get_last_value(label_stats, "mean_squared_error"), |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
245 "root_mean_squared_error": get_last_value( |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
246 label_stats, "root_mean_squared_error" |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
247 ), |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
248 "root_mean_squared_percentage_error": get_last_value( |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
249 label_stats, "root_mean_squared_percentage_error" |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
250 ), |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
251 "r2": get_last_value(label_stats, "r2"), |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
252 } |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
253 else: |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
254 metrics[split] = { |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
255 "accuracy": get_last_value(label_stats, "accuracy"), |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
256 "accuracy_micro": get_last_value(label_stats, "accuracy_micro"), |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
257 "loss": get_last_value(label_stats, "loss"), |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
258 "roc_auc": get_last_value(label_stats, "roc_auc"), |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
259 "hits_at_k": get_last_value(label_stats, "hits_at_k"), |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
260 } |
9
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents:
8
diff
changeset
|
261 |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
262 # Test metrics: dynamic extraction according to exclusions |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
263 test_label_stats = test_stats.get("label", {}) |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
264 if not test_label_stats: |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
265 logging.warning("No label statistics found for test split") |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
266 else: |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
267 combined_stats = test_stats.get("combined", {}) |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
268 overall_stats = test_label_stats.get("overall_stats", {}) |
9
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents:
8
diff
changeset
|
269 |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
270 # Define exclusions |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
271 if output_type == "binary": |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
272 exclude = {"per_class_stats", "precision_recall_curve", "roc_curve"} |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
273 else: |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
274 exclude = {"per_class_stats", "confusion_matrix"} |
9
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents:
8
diff
changeset
|
275 |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
276 # 1. Get all scalar test_label_stats not excluded |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
277 test_metrics = {} |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
278 for k, v in test_label_stats.items(): |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
279 if k in exclude: |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
280 continue |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
281 if k == "overall_stats": |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
282 continue |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
283 if isinstance(v, (int, float, str, bool)): |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
284 test_metrics[k] = v |
9
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents:
8
diff
changeset
|
285 |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
286 # 2. Add overall_stats (flattened) |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
287 for k, v in overall_stats.items(): |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
288 test_metrics[k] = v |
9
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents:
8
diff
changeset
|
289 |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
290 # 3. Optionally include combined/loss if present and not already |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
291 if "loss" in combined_stats and "loss" not in test_metrics: |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
292 test_metrics["loss"] = combined_stats["loss"] |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
293 metrics["test"] = test_metrics |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
294 return metrics |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
295 |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
296 |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
297 def generate_table_row(cells, styles): |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
298 """Helper function to generate an HTML table row.""" |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
299 return ( |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
300 "<tr>" |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
301 + "".join(f"<td style='{styles}'>{cell}</td>" for cell in cells) |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
302 + "</tr>" |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
303 ) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
304 |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
305 |
8
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
306 # ----------------------------------------- |
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
307 # 2) MODEL PERFORMANCE (Train/Val/Test) TABLE |
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
308 # ----------------------------------------- |
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
309 |
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
310 |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
311 def format_stats_table_html(train_stats: dict, test_stats: dict) -> str: |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
312 """Formats a combined HTML table for training, validation, and test metrics.""" |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
313 output_type = detect_output_type(test_stats) |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
314 all_metrics = extract_metrics_from_json(train_stats, test_stats, output_type) |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
315 rows = [] |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
316 for metric_key in sorted(all_metrics["training"].keys()): |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
317 if ( |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
318 metric_key in all_metrics["validation"] |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
319 and metric_key in all_metrics["test"] |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
320 ): |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
321 display_name = METRIC_DISPLAY_NAMES.get( |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
322 metric_key, |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
323 metric_key.replace("_", " ").title(), |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
324 ) |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
325 t = all_metrics["training"].get(metric_key) |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
326 v = all_metrics["validation"].get(metric_key) |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
327 te = all_metrics["test"].get(metric_key) |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
328 if all(x is not None for x in [t, v, te]): |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
329 rows.append([display_name, f"{t:.4f}", f"{v:.4f}", f"{te:.4f}"]) |
9
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents:
8
diff
changeset
|
330 |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
331 if not rows: |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
332 return "<table><tr><td>No metric values found.</td></tr></table>" |
9
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents:
8
diff
changeset
|
333 |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
334 html = ( |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
335 "<h2 style='text-align: center;'>Model Performance Summary</h2>" |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
336 "<div style='display: flex; justify-content: center;'>" |
8
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
337 "<table class='performance-summary' style='border-collapse: collapse;'>" |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
338 "<thead><tr>" |
8
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
339 "<th class='sortable' style='padding: 10px; border: 1px solid #ccc; text-align: left; white-space: nowrap;'>Metric</th>" |
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
340 "<th class='sortable' style='padding: 10px; border: 1px solid #ccc; text-align: center; white-space: nowrap;'>Train</th>" |
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
341 "<th class='sortable' style='padding: 10px; border: 1px solid #ccc; text-align: center; white-space: nowrap;'>Validation</th>" |
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
342 "<th class='sortable' style='padding: 10px; border: 1px solid #ccc; text-align: center; white-space: nowrap;'>Test</th>" |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
343 "</tr></thead><tbody>" |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
344 ) |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
345 for row in rows: |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
346 html += generate_table_row( |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
347 row, |
9
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents:
8
diff
changeset
|
348 "padding: 10px; border: 1px solid #ccc; text-align: center; white-space: nowrap;", |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
349 ) |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
350 html += "</tbody></table></div><br>" |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
351 return html |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
352 |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
353 |
8
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
354 # ------------------------------------------- |
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
355 # 3) TRAIN/VALIDATION PERFORMANCE SUMMARY TABLE |
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
356 # ------------------------------------------- |
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
357 |
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
358 |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
359 def format_train_val_stats_table_html(train_stats: dict, test_stats: dict) -> str: |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
360 """Formats an HTML table for training and validation metrics.""" |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
361 output_type = detect_output_type(test_stats) |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
362 all_metrics = extract_metrics_from_json(train_stats, test_stats, output_type) |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
363 rows = [] |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
364 for metric_key in sorted(all_metrics["training"].keys()): |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
365 if metric_key in all_metrics["validation"]: |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
366 display_name = METRIC_DISPLAY_NAMES.get( |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
367 metric_key, |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
368 metric_key.replace("_", " ").title(), |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
369 ) |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
370 t = all_metrics["training"].get(metric_key) |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
371 v = all_metrics["validation"].get(metric_key) |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
372 if t is not None and v is not None: |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
373 rows.append([display_name, f"{t:.4f}", f"{v:.4f}"]) |
9
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents:
8
diff
changeset
|
374 |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
375 if not rows: |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
376 return "<table><tr><td>No metric values found for Train/Validation.</td></tr></table>" |
9
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents:
8
diff
changeset
|
377 |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
378 html = ( |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
379 "<h2 style='text-align: center;'>Train/Validation Performance Summary</h2>" |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
380 "<div style='display: flex; justify-content: center;'>" |
8
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
381 "<table class='performance-summary' style='border-collapse: collapse;'>" |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
382 "<thead><tr>" |
8
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
383 "<th class='sortable' style='padding: 10px; border: 1px solid #ccc; text-align: left; white-space: nowrap;'>Metric</th>" |
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
384 "<th class='sortable' style='padding: 10px; border: 1px solid #ccc; text-align: center; white-space: nowrap;'>Train</th>" |
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
385 "<th class='sortable' style='padding: 10px; border: 1px solid #ccc; text-align: center; white-space: nowrap;'>Validation</th>" |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
386 "</tr></thead><tbody>" |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
387 ) |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
388 for row in rows: |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
389 html += generate_table_row( |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
390 row, |
9
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents:
8
diff
changeset
|
391 "padding: 10px; border: 1px solid #ccc; text-align: center; white-space: nowrap;", |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
392 ) |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
393 html += "</tbody></table></div><br>" |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
394 return html |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
395 |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
396 |
8
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
397 # ----------------------------------------- |
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
398 # 4) TEST‐ONLY PERFORMANCE SUMMARY TABLE |
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
399 # ----------------------------------------- |
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
400 |
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
401 |
2
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
402 def format_test_merged_stats_table_html( |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
403 test_metrics: Dict[str, Optional[float]], |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
404 ) -> str: |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
405 """Formats an HTML table for test metrics.""" |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
406 rows = [] |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
407 for key in sorted(test_metrics.keys()): |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
408 display_name = METRIC_DISPLAY_NAMES.get(key, key.replace("_", " ").title()) |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
409 value = test_metrics[key] |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
410 if value is not None: |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
411 rows.append([display_name, f"{value:.4f}"]) |
9
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents:
8
diff
changeset
|
412 |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
413 if not rows: |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
414 return "<table><tr><td>No test metric values found.</td></tr></table>" |
9
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents:
8
diff
changeset
|
415 |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
416 html = ( |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
417 "<h2 style='text-align: center;'>Test Performance Summary</h2>" |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
418 "<div style='display: flex; justify-content: center;'>" |
8
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
419 "<table class='performance-summary' style='border-collapse: collapse;'>" |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
420 "<thead><tr>" |
8
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
421 "<th class='sortable' style='padding: 10px; border: 1px solid #ccc; text-align: left; white-space: nowrap;'>Metric</th>" |
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
422 "<th class='sortable' style='padding: 10px; border: 1px solid #ccc; text-align: center; white-space: nowrap;'>Test</th>" |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
423 "</tr></thead><tbody>" |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
424 ) |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
425 for row in rows: |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
426 html += generate_table_row( |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
427 row, |
9
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents:
8
diff
changeset
|
428 "padding: 10px; border: 1px solid #ccc; text-align: center; white-space: nowrap;", |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
429 ) |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
430 html += "</tbody></table></div><br>" |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
431 return html |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
432 |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
433 |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
434 def split_data_0_2( |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
435 df: pd.DataFrame, |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
436 split_column: str, |
7
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents:
6
diff
changeset
|
437 validation_size: float = 0.1, |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
438 random_state: int = 42, |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
439 label_column: Optional[str] = None, |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
440 ) -> pd.DataFrame: |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
441 """Given a DataFrame whose split_column only contains {0,2}, re-assign a portion of the 0s to become 1s (validation).""" |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
442 out = df.copy() |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
443 out[split_column] = pd.to_numeric(out[split_column], errors="coerce").astype(int) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
444 idx_train = out.index[out[split_column] == 0].tolist() |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
445 if not idx_train: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
446 logger.info("No rows with split=0; nothing to do.") |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
447 return out |
7
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents:
6
diff
changeset
|
448 # Always use stratify if possible |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
449 stratify_arr = None |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
450 if label_column and label_column in out.columns: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
451 label_counts = out.loc[idx_train, label_column].value_counts() |
7
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents:
6
diff
changeset
|
452 if label_counts.size > 1: |
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents:
6
diff
changeset
|
453 # Force stratify even with fewer samples - adjust validation_size if needed |
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents:
6
diff
changeset
|
454 min_samples_per_class = label_counts.min() |
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents:
6
diff
changeset
|
455 if min_samples_per_class * validation_size < 1: |
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents:
6
diff
changeset
|
456 # Adjust validation_size to ensure at least 1 sample per class, but do not exceed original validation_size |
9
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents:
8
diff
changeset
|
457 adjusted_validation_size = min( |
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents:
8
diff
changeset
|
458 validation_size, 1.0 / min_samples_per_class |
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents:
8
diff
changeset
|
459 ) |
7
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents:
6
diff
changeset
|
460 if adjusted_validation_size != validation_size: |
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents:
6
diff
changeset
|
461 validation_size = adjusted_validation_size |
9
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents:
8
diff
changeset
|
462 logger.info( |
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents:
8
diff
changeset
|
463 f"Adjusted validation_size to {validation_size:.3f} to ensure at least one sample per class in validation" |
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents:
8
diff
changeset
|
464 ) |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
465 stratify_arr = out.loc[idx_train, label_column] |
7
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents:
6
diff
changeset
|
466 logger.info("Using stratified split for validation set") |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
467 else: |
7
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents:
6
diff
changeset
|
468 logger.warning("Only one label class found; cannot stratify") |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
469 if validation_size <= 0: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
470 logger.info("validation_size <= 0; keeping all as train.") |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
471 return out |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
472 if validation_size >= 1: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
473 logger.info("validation_size >= 1; moving all train → validation.") |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
474 out.loc[idx_train, split_column] = 1 |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
475 return out |
7
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents:
6
diff
changeset
|
476 # Always try stratified split first |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
477 try: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
478 train_idx, val_idx = train_test_split( |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
479 idx_train, |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
480 test_size=validation_size, |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
481 random_state=random_state, |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
482 stratify=stratify_arr, |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
483 ) |
7
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents:
6
diff
changeset
|
484 logger.info("Successfully applied stratified split") |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
485 except ValueError as e: |
7
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents:
6
diff
changeset
|
486 logger.warning(f"Stratified split failed ({e}); falling back to random split.") |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
487 train_idx, val_idx = train_test_split( |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
488 idx_train, |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
489 test_size=validation_size, |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
490 random_state=random_state, |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
491 stratify=None, |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
492 ) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
493 out.loc[train_idx, split_column] = 0 |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
494 out.loc[val_idx, split_column] = 1 |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
495 out[split_column] = out[split_column].astype(int) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
496 return out |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
497 |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
498 |
7
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents:
6
diff
changeset
|
499 def create_stratified_random_split( |
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents:
6
diff
changeset
|
500 df: pd.DataFrame, |
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents:
6
diff
changeset
|
501 split_column: str, |
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents:
6
diff
changeset
|
502 split_probabilities: list = [0.7, 0.1, 0.2], |
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents:
6
diff
changeset
|
503 random_state: int = 42, |
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents:
6
diff
changeset
|
504 label_column: Optional[str] = None, |
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents:
6
diff
changeset
|
505 ) -> pd.DataFrame: |
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents:
6
diff
changeset
|
506 """Create a stratified random split when no split column exists.""" |
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents:
6
diff
changeset
|
507 out = df.copy() |
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents:
6
diff
changeset
|
508 # initialize split column |
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents:
6
diff
changeset
|
509 out[split_column] = 0 |
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents:
6
diff
changeset
|
510 if not label_column or label_column not in out.columns: |
9
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents:
8
diff
changeset
|
511 logger.warning( |
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents:
8
diff
changeset
|
512 "No label column found; using random split without stratification" |
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents:
8
diff
changeset
|
513 ) |
7
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents:
6
diff
changeset
|
514 # fall back to simple random assignment |
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents:
6
diff
changeset
|
515 indices = out.index.tolist() |
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents:
6
diff
changeset
|
516 np.random.seed(random_state) |
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents:
6
diff
changeset
|
517 np.random.shuffle(indices) |
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents:
6
diff
changeset
|
518 n_total = len(indices) |
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents:
6
diff
changeset
|
519 n_train = int(n_total * split_probabilities[0]) |
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents:
6
diff
changeset
|
520 n_val = int(n_total * split_probabilities[1]) |
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents:
6
diff
changeset
|
521 out.loc[indices[:n_train], split_column] = 0 |
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents:
6
diff
changeset
|
522 out.loc[indices[n_train:n_train + n_val], split_column] = 1 |
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents:
6
diff
changeset
|
523 out.loc[indices[n_train + n_val:], split_column] = 2 |
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents:
6
diff
changeset
|
524 return out.astype({split_column: int}) |
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents:
6
diff
changeset
|
525 # check if stratification is possible |
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents:
6
diff
changeset
|
526 label_counts = out[label_column].value_counts() |
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents:
6
diff
changeset
|
527 min_samples_per_class = label_counts.min() |
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents:
6
diff
changeset
|
528 # ensure we have enough samples for stratification: |
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents:
6
diff
changeset
|
529 # Each class must have at least as many samples as the number of splits, |
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents:
6
diff
changeset
|
530 # so that each split can receive at least one sample per class. |
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents:
6
diff
changeset
|
531 min_samples_required = len(split_probabilities) |
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents:
6
diff
changeset
|
532 if min_samples_per_class < min_samples_required: |
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents:
6
diff
changeset
|
533 logger.warning( |
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents:
6
diff
changeset
|
534 f"Insufficient samples per class for stratification (min: {min_samples_per_class}, required: {min_samples_required}); using random split" |
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents:
6
diff
changeset
|
535 ) |
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents:
6
diff
changeset
|
536 # fall back to simple random assignment |
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents:
6
diff
changeset
|
537 indices = out.index.tolist() |
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents:
6
diff
changeset
|
538 np.random.seed(random_state) |
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents:
6
diff
changeset
|
539 np.random.shuffle(indices) |
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents:
6
diff
changeset
|
540 n_total = len(indices) |
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents:
6
diff
changeset
|
541 n_train = int(n_total * split_probabilities[0]) |
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents:
6
diff
changeset
|
542 n_val = int(n_total * split_probabilities[1]) |
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents:
6
diff
changeset
|
543 out.loc[indices[:n_train], split_column] = 0 |
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents:
6
diff
changeset
|
544 out.loc[indices[n_train:n_train + n_val], split_column] = 1 |
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents:
6
diff
changeset
|
545 out.loc[indices[n_train + n_val:], split_column] = 2 |
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents:
6
diff
changeset
|
546 return out.astype({split_column: int}) |
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents:
6
diff
changeset
|
547 logger.info("Using stratified random split for train/validation/test sets") |
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents:
6
diff
changeset
|
548 # first split: separate test set |
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents:
6
diff
changeset
|
549 train_val_idx, test_idx = train_test_split( |
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents:
6
diff
changeset
|
550 out.index.tolist(), |
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents:
6
diff
changeset
|
551 test_size=split_probabilities[2], |
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents:
6
diff
changeset
|
552 random_state=random_state, |
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents:
6
diff
changeset
|
553 stratify=out[label_column], |
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents:
6
diff
changeset
|
554 ) |
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents:
6
diff
changeset
|
555 # second split: separate training and validation from remaining data |
9
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents:
8
diff
changeset
|
556 val_size_adjusted = split_probabilities[1] / ( |
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents:
8
diff
changeset
|
557 split_probabilities[0] + split_probabilities[1] |
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents:
8
diff
changeset
|
558 ) |
7
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents:
6
diff
changeset
|
559 train_idx, val_idx = train_test_split( |
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents:
6
diff
changeset
|
560 train_val_idx, |
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents:
6
diff
changeset
|
561 test_size=val_size_adjusted, |
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents:
6
diff
changeset
|
562 random_state=random_state, |
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents:
6
diff
changeset
|
563 stratify=out.loc[train_val_idx, label_column], |
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents:
6
diff
changeset
|
564 ) |
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents:
6
diff
changeset
|
565 # assign split values |
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents:
6
diff
changeset
|
566 out.loc[train_idx, split_column] = 0 |
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents:
6
diff
changeset
|
567 out.loc[val_idx, split_column] = 1 |
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents:
6
diff
changeset
|
568 out.loc[test_idx, split_column] = 2 |
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents:
6
diff
changeset
|
569 logger.info("Successfully applied stratified random split") |
9
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents:
8
diff
changeset
|
570 logger.info( |
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents:
8
diff
changeset
|
571 f"Split counts: Train={len(train_idx)}, Val={len(val_idx)}, Test={len(test_idx)}" |
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents:
8
diff
changeset
|
572 ) |
7
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents:
6
diff
changeset
|
573 return out.astype({split_column: int}) |
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents:
6
diff
changeset
|
574 |
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents:
6
diff
changeset
|
575 |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
576 class Backend(Protocol): |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
577 """Interface for a machine learning backend.""" |
9
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents:
8
diff
changeset
|
578 |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
579 def prepare_config( |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
580 self, |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
581 config_params: Dict[str, Any], |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
582 split_config: Dict[str, Any], |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
583 ) -> str: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
584 ... |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
585 |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
586 def run_experiment( |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
587 self, |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
588 dataset_path: Path, |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
589 config_path: Path, |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
590 output_dir: Path, |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
591 random_seed: int, |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
592 ) -> None: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
593 ... |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
594 |
2
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
595 def generate_plots(self, output_dir: Path) -> None: |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
596 ... |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
597 |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
598 def generate_html_report( |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
599 self, |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
600 title: str, |
2
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
601 output_dir: str, |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
602 config: Dict[str, Any], |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
603 split_info: str, |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
604 ) -> Path: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
605 ... |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
606 |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
607 |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
608 class LudwigDirectBackend: |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
609 """Backend for running Ludwig experiments directly via the internal experiment_cli function.""" |
9
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents:
8
diff
changeset
|
610 |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
611 def prepare_config( |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
612 self, |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
613 config_params: Dict[str, Any], |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
614 split_config: Dict[str, Any], |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
615 ) -> str: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
616 logger.info("LudwigDirectBackend: Preparing YAML configuration.") |
9
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents:
8
diff
changeset
|
617 |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
618 model_name = config_params.get("model_name", "resnet18") |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
619 use_pretrained = config_params.get("use_pretrained", False) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
620 fine_tune = config_params.get("fine_tune", False) |
2
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
621 if use_pretrained: |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
622 trainable = bool(fine_tune) |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
623 else: |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
624 trainable = True |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
625 epochs = config_params.get("epochs", 10) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
626 batch_size = config_params.get("batch_size") |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
627 num_processes = config_params.get("preprocessing_num_processes", 1) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
628 early_stop = config_params.get("early_stop", None) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
629 learning_rate = config_params.get("learning_rate") |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
630 learning_rate = "auto" if learning_rate is None else float(learning_rate) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
631 raw_encoder = MODEL_ENCODER_TEMPLATES.get(model_name, model_name) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
632 if isinstance(raw_encoder, dict): |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
633 encoder_config = { |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
634 **raw_encoder, |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
635 "use_pretrained": use_pretrained, |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
636 "trainable": trainable, |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
637 } |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
638 else: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
639 encoder_config = {"type": raw_encoder} |
9
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents:
8
diff
changeset
|
640 |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
641 batch_size_cfg = batch_size or "auto" |
9
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents:
8
diff
changeset
|
642 |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
643 label_column_path = config_params.get("label_column_data_path") |
2
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
644 label_series = None |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
645 if label_column_path is not None and Path(label_column_path).exists(): |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
646 try: |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
647 label_series = pd.read_csv(label_column_path)[LABEL_COLUMN_NAME] |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
648 except Exception as e: |
2
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
649 logger.warning(f"Could not read label column for task detection: {e}") |
9
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents:
8
diff
changeset
|
650 |
2
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
651 if ( |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
652 label_series is not None |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
653 and ptypes.is_numeric_dtype(label_series.dtype) |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
654 and label_series.nunique() > 10 |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
655 ): |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
656 task_type = "regression" |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
657 else: |
2
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
658 task_type = "classification" |
9
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents:
8
diff
changeset
|
659 |
2
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
660 config_params["task_type"] = task_type |
9
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents:
8
diff
changeset
|
661 |
2
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
662 image_feat: Dict[str, Any] = { |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
663 "name": IMAGE_PATH_COLUMN_NAME, |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
664 "type": "image", |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
665 "encoder": encoder_config, |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
666 } |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
667 if config_params.get("augmentation") is not None: |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
668 image_feat["augmentation"] = config_params["augmentation"] |
9
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents:
8
diff
changeset
|
669 |
2
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
670 if task_type == "regression": |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
671 output_feat = { |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
672 "name": LABEL_COLUMN_NAME, |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
673 "type": "number", |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
674 "decoder": {"type": "regressor"}, |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
675 "loss": {"type": "mean_squared_error"}, |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
676 "evaluation": { |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
677 "metrics": [ |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
678 "mean_squared_error", |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
679 "mean_absolute_error", |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
680 "r2", |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
681 ] |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
682 }, |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
683 } |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
684 val_metric = config_params.get("validation_metric", "mean_squared_error") |
9
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents:
8
diff
changeset
|
685 |
2
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
686 else: |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
687 num_unique_labels = ( |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
688 label_series.nunique() if label_series is not None else 2 |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
689 ) |
2
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
690 output_type = "binary" if num_unique_labels == 2 else "category" |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
691 output_feat = {"name": LABEL_COLUMN_NAME, "type": output_type} |
8
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
692 if output_type == "binary" and config_params.get("threshold") is not None: |
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
693 output_feat["threshold"] = float(config_params["threshold"]) |
2
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
694 val_metric = None |
9
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents:
8
diff
changeset
|
695 |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
696 conf: Dict[str, Any] = { |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
697 "model_type": "ecd", |
2
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
698 "input_features": [image_feat], |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
699 "output_features": [output_feat], |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
700 "combiner": {"type": "concat"}, |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
701 "trainer": { |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
702 "epochs": epochs, |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
703 "early_stop": early_stop, |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
704 "batch_size": batch_size_cfg, |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
705 "learning_rate": learning_rate, |
2
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
706 # only set validation_metric for regression |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
707 **({"validation_metric": val_metric} if val_metric else {}), |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
708 }, |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
709 "preprocessing": { |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
710 "split": split_config, |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
711 "num_processes": num_processes, |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
712 "in_memory": False, |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
713 }, |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
714 } |
9
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents:
8
diff
changeset
|
715 |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
716 logger.debug("LudwigDirectBackend: Config dict built.") |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
717 try: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
718 yaml_str = yaml.dump(conf, sort_keys=False, indent=2) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
719 logger.info("LudwigDirectBackend: YAML config generated.") |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
720 return yaml_str |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
721 except Exception: |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
722 logger.error( |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
723 "LudwigDirectBackend: Failed to serialize YAML.", |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
724 exc_info=True, |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
725 ) |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
726 raise |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
727 |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
728 def run_experiment( |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
729 self, |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
730 dataset_path: Path, |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
731 config_path: Path, |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
732 output_dir: Path, |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
733 random_seed: int = 42, |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
734 ) -> None: |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
735 """Invoke Ludwig's internal experiment_cli function to run the experiment.""" |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
736 logger.info("LudwigDirectBackend: Starting experiment execution.") |
9
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents:
8
diff
changeset
|
737 |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
738 try: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
739 from ludwig.experiment import experiment_cli |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
740 except ImportError as e: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
741 logger.error( |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
742 "LudwigDirectBackend: Could not import experiment_cli.", |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
743 exc_info=True, |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
744 ) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
745 raise RuntimeError("Ludwig import failed.") from e |
9
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents:
8
diff
changeset
|
746 |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
747 output_dir.mkdir(parents=True, exist_ok=True) |
9
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents:
8
diff
changeset
|
748 |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
749 try: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
750 experiment_cli( |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
751 dataset=str(dataset_path), |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
752 config=str(config_path), |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
753 output_directory=str(output_dir), |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
754 random_seed=random_seed, |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
755 ) |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
756 logger.info( |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
757 f"LudwigDirectBackend: Experiment completed. Results in {output_dir}" |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
758 ) |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
759 except TypeError as e: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
760 logger.error( |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
761 "LudwigDirectBackend: Argument mismatch in experiment_cli call.", |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
762 exc_info=True, |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
763 ) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
764 raise RuntimeError("Ludwig argument error.") from e |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
765 except Exception: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
766 logger.error( |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
767 "LudwigDirectBackend: Experiment execution error.", |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
768 exc_info=True, |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
769 ) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
770 raise |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
771 |
2
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
772 def get_training_process(self, output_dir) -> Optional[Dict[str, Any]]: |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
773 """Retrieve the learning rate used in the most recent Ludwig run.""" |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
774 output_dir = Path(output_dir) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
775 exp_dirs = sorted( |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
776 output_dir.glob("experiment_run*"), |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
777 key=lambda p: p.stat().st_mtime, |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
778 ) |
9
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents:
8
diff
changeset
|
779 |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
780 if not exp_dirs: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
781 logger.warning(f"No experiment run directories found in {output_dir}") |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
782 return None |
9
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents:
8
diff
changeset
|
783 |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
784 progress_file = exp_dirs[-1] / "model" / "training_progress.json" |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
785 if not progress_file.exists(): |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
786 logger.warning(f"No training_progress.json found in {progress_file}") |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
787 return None |
9
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents:
8
diff
changeset
|
788 |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
789 try: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
790 with progress_file.open("r", encoding="utf-8") as f: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
791 data = json.load(f) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
792 return { |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
793 "learning_rate": data.get("learning_rate"), |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
794 "batch_size": data.get("batch_size"), |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
795 "epoch": data.get("epoch"), |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
796 } |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
797 except Exception as e: |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
798 logger.warning(f"Failed to read training progress info: {e}") |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
799 return {} |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
800 |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
801 def convert_parquet_to_csv(self, output_dir: Path): |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
802 """Convert the predictions Parquet file to CSV.""" |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
803 output_dir = Path(output_dir) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
804 exp_dirs = sorted( |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
805 output_dir.glob("experiment_run*"), |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
806 key=lambda p: p.stat().st_mtime, |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
807 ) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
808 if not exp_dirs: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
809 logger.warning(f"No experiment run dirs found in {output_dir}") |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
810 return |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
811 exp_dir = exp_dirs[-1] |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
812 parquet_path = exp_dir / PREDICTIONS_PARQUET_FILE_NAME |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
813 csv_path = exp_dir / "predictions.csv" |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
814 try: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
815 df = pd.read_parquet(parquet_path) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
816 df.to_csv(csv_path, index=False) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
817 logger.info(f"Converted Parquet to CSV: {csv_path}") |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
818 except Exception as e: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
819 logger.error(f"Error converting Parquet to CSV: {e}") |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
820 |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
821 def generate_plots(self, output_dir: Path) -> None: |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
822 """Generate all registered Ludwig visualizations for the latest experiment run.""" |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
823 logger.info("Generating all Ludwig visualizations…") |
9
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents:
8
diff
changeset
|
824 |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
825 test_plots = { |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
826 "compare_performance", |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
827 "compare_classifiers_performance_from_prob", |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
828 "compare_classifiers_performance_from_pred", |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
829 "compare_classifiers_performance_changing_k", |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
830 "compare_classifiers_multiclass_multimetric", |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
831 "compare_classifiers_predictions", |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
832 "confidence_thresholding_2thresholds_2d", |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
833 "confidence_thresholding_2thresholds_3d", |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
834 "confidence_thresholding", |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
835 "confidence_thresholding_data_vs_acc", |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
836 "binary_threshold_vs_metric", |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
837 "roc_curves", |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
838 "roc_curves_from_test_statistics", |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
839 "calibration_1_vs_all", |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
840 "calibration_multiclass", |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
841 "confusion_matrix", |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
842 "frequency_vs_f1", |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
843 } |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
844 train_plots = { |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
845 "learning_curves", |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
846 "compare_classifiers_performance_subset", |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
847 } |
9
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents:
8
diff
changeset
|
848 |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
849 output_dir = Path(output_dir) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
850 exp_dirs = sorted( |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
851 output_dir.glob("experiment_run*"), |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
852 key=lambda p: p.stat().st_mtime, |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
853 ) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
854 if not exp_dirs: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
855 logger.warning(f"No experiment run dirs found in {output_dir}") |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
856 return |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
857 exp_dir = exp_dirs[-1] |
9
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents:
8
diff
changeset
|
858 |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
859 viz_dir = exp_dir / "visualizations" |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
860 viz_dir.mkdir(exist_ok=True) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
861 train_viz = viz_dir / "train" |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
862 test_viz = viz_dir / "test" |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
863 train_viz.mkdir(parents=True, exist_ok=True) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
864 test_viz.mkdir(parents=True, exist_ok=True) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
865 |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
866 def _check(p: Path) -> Optional[str]: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
867 return str(p) if p.exists() else None |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
868 |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
869 training_stats = _check(exp_dir / "training_statistics.json") |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
870 test_stats = _check(exp_dir / TEST_STATISTICS_FILE_NAME) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
871 probs_path = _check(exp_dir / PREDICTIONS_PARQUET_FILE_NAME) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
872 gt_metadata = _check(exp_dir / "model" / TRAIN_SET_METADATA_FILE_NAME) |
9
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents:
8
diff
changeset
|
873 |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
874 dataset_path = None |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
875 split_file = None |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
876 desc = exp_dir / DESCRIPTION_FILE_NAME |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
877 if desc.exists(): |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
878 with open(desc, "r") as f: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
879 cfg = json.load(f) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
880 dataset_path = _check(Path(cfg.get("dataset", ""))) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
881 split_file = _check(Path(get_split_path(cfg.get("dataset", "")))) |
9
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents:
8
diff
changeset
|
882 |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
883 output_feature = "" |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
884 if desc.exists(): |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
885 try: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
886 output_feature = cfg["config"]["output_features"][0]["name"] |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
887 except Exception: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
888 pass |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
889 if not output_feature and test_stats: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
890 with open(test_stats, "r") as f: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
891 stats = json.load(f) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
892 output_feature = next(iter(stats.keys()), "") |
9
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents:
8
diff
changeset
|
893 |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
894 viz_registry = get_visualizations_registry() |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
895 for viz_name, viz_func in viz_registry.items(): |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
896 if viz_name in train_plots: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
897 viz_dir_plot = train_viz |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
898 elif viz_name in test_plots: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
899 viz_dir_plot = test_viz |
2
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
900 else: |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
901 continue |
9
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents:
8
diff
changeset
|
902 |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
903 try: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
904 viz_func( |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
905 training_statistics=[training_stats] if training_stats else [], |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
906 test_statistics=[test_stats] if test_stats else [], |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
907 probabilities=[probs_path] if probs_path else [], |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
908 output_feature_name=output_feature, |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
909 ground_truth_split=2, |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
910 top_n_classes=[0], |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
911 top_k=3, |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
912 ground_truth_metadata=gt_metadata, |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
913 ground_truth=dataset_path, |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
914 split_file=split_file, |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
915 output_directory=str(viz_dir_plot), |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
916 normalize=False, |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
917 file_format="png", |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
918 ) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
919 logger.info(f"✔ Generated {viz_name}") |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
920 except Exception as e: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
921 logger.warning(f"✘ Skipped {viz_name}: {e}") |
9
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents:
8
diff
changeset
|
922 |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
923 logger.info(f"All visualizations written to {viz_dir}") |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
924 |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
925 def generate_html_report( |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
926 self, |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
927 title: str, |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
928 output_dir: str, |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
929 config: dict, |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
930 split_info: str, |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
931 ) -> Path: |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
932 """Assemble an HTML report from visualizations under train_val/ and test/ folders.""" |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
933 cwd = Path.cwd() |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
934 report_name = title.lower().replace(" ", "_") + "_report.html" |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
935 report_path = cwd / report_name |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
936 output_dir = Path(output_dir) |
2
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
937 output_type = None |
9
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents:
8
diff
changeset
|
938 |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
939 exp_dirs = sorted( |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
940 output_dir.glob("experiment_run*"), |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
941 key=lambda p: p.stat().st_mtime, |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
942 ) |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
943 if not exp_dirs: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
944 raise RuntimeError(f"No 'experiment*' dirs found in {output_dir}") |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
945 exp_dir = exp_dirs[-1] |
9
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents:
8
diff
changeset
|
946 |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
947 base_viz_dir = exp_dir / "visualizations" |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
948 train_viz_dir = base_viz_dir / "train" |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
949 test_viz_dir = base_viz_dir / "test" |
9
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents:
8
diff
changeset
|
950 |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
951 html = get_html_template() |
10
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
9
diff
changeset
|
952 |
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
9
diff
changeset
|
953 # Extra CSS & JS: center Plotly and enable CSV download for predictions table |
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
9
diff
changeset
|
954 html += """ |
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
9
diff
changeset
|
955 <style> |
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
9
diff
changeset
|
956 /* Center Plotly figures (both wrapper and native classes) */ |
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
9
diff
changeset
|
957 .plotly-center { display: flex; justify-content: center; } |
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
9
diff
changeset
|
958 .plotly-center .plotly-graph-div, .plotly-center .js-plotly-plot { margin: 0 auto !important; } |
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
9
diff
changeset
|
959 .js-plotly-plot, .plotly-graph-div { margin-left: auto !important; margin-right: auto !important; } |
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
9
diff
changeset
|
960 |
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
9
diff
changeset
|
961 /* Download button for predictions table */ |
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
9
diff
changeset
|
962 .download-btn { |
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
9
diff
changeset
|
963 padding: 8px 12px; |
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
9
diff
changeset
|
964 border: 1px solid #4CAF50; |
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
9
diff
changeset
|
965 background: #4CAF50; |
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
9
diff
changeset
|
966 color: white; |
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
9
diff
changeset
|
967 border-radius: 6px; |
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
9
diff
changeset
|
968 cursor: pointer; |
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
9
diff
changeset
|
969 } |
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
9
diff
changeset
|
970 .download-btn:hover { filter: brightness(0.95); } |
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
9
diff
changeset
|
971 .preds-controls { |
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
9
diff
changeset
|
972 display: flex; |
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
9
diff
changeset
|
973 justify-content: flex-end; |
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
9
diff
changeset
|
974 gap: 8px; |
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
9
diff
changeset
|
975 margin: 8px 0; |
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
9
diff
changeset
|
976 } |
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
9
diff
changeset
|
977 </style> |
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
9
diff
changeset
|
978 <script> |
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
9
diff
changeset
|
979 function tableToCSV(table){ |
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
9
diff
changeset
|
980 const rows = Array.from(table.querySelectorAll('tr')); |
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
9
diff
changeset
|
981 return rows.map(row => |
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
9
diff
changeset
|
982 Array.from(row.querySelectorAll('th,td')).map(cell => { |
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
9
diff
changeset
|
983 let text = cell.innerText.replace(/\\r?\\n|\\r/g,' ').trim(); |
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
9
diff
changeset
|
984 if (text.includes('"') || text.includes(',')) { |
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
9
diff
changeset
|
985 text = '"' + text.replace(/"/g,'""') + '"'; |
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
9
diff
changeset
|
986 } |
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
9
diff
changeset
|
987 return text; |
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
9
diff
changeset
|
988 }).join(',') |
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
9
diff
changeset
|
989 ).join('\\n'); |
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
9
diff
changeset
|
990 } |
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
9
diff
changeset
|
991 document.addEventListener('DOMContentLoaded', function(){ |
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
9
diff
changeset
|
992 const btn = document.getElementById('downloadPredsCsv'); |
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
9
diff
changeset
|
993 if(btn){ |
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
9
diff
changeset
|
994 btn.addEventListener('click', function(){ |
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
9
diff
changeset
|
995 const tbl = document.querySelector('.predictions-table'); |
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
9
diff
changeset
|
996 if(!tbl){ alert('Predictions table not found.'); return; } |
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
9
diff
changeset
|
997 const csv = tableToCSV(tbl); |
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
9
diff
changeset
|
998 const blob = new Blob([csv], {type: 'text/csv;charset=utf-8;'}); |
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
9
diff
changeset
|
999 const url = URL.createObjectURL(blob); |
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
9
diff
changeset
|
1000 const a = document.createElement('a'); |
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
9
diff
changeset
|
1001 a.href = url; |
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
9
diff
changeset
|
1002 a.download = 'ground_truth_vs_predictions.csv'; |
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
9
diff
changeset
|
1003 document.body.appendChild(a); |
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
9
diff
changeset
|
1004 a.click(); |
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
9
diff
changeset
|
1005 document.body.removeChild(a); |
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
9
diff
changeset
|
1006 URL.revokeObjectURL(url); |
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
9
diff
changeset
|
1007 }); |
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
9
diff
changeset
|
1008 } |
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
9
diff
changeset
|
1009 }); |
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
9
diff
changeset
|
1010 </script> |
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
9
diff
changeset
|
1011 """ |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1012 html += f"<h1>{title}</h1>" |
9
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents:
8
diff
changeset
|
1013 |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1014 metrics_html = "" |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1015 train_val_metrics_html = "" |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1016 test_metrics_html = "" |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1017 try: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1018 train_stats_path = exp_dir / "training_statistics.json" |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1019 test_stats_path = exp_dir / TEST_STATISTICS_FILE_NAME |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1020 if train_stats_path.exists() and test_stats_path.exists(): |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1021 with open(train_stats_path) as f: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1022 train_stats = json.load(f) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1023 with open(test_stats_path) as f: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1024 test_stats = json.load(f) |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1025 output_type = detect_output_type(test_stats) |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1026 metrics_html = format_stats_table_html(train_stats, test_stats) |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1027 train_val_metrics_html = format_train_val_stats_table_html( |
2
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
1028 train_stats, test_stats |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1029 ) |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1030 test_metrics_html = format_test_merged_stats_table_html( |
2
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
1031 extract_metrics_from_json(train_stats, test_stats, output_type)[ |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
1032 "test" |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
1033 ] |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1034 ) |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1035 except Exception as e: |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1036 logger.warning( |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1037 f"Could not load stats for HTML report: {type(e).__name__}: {e}" |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1038 ) |
9
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents:
8
diff
changeset
|
1039 |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1040 config_html = "" |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1041 training_progress = self.get_training_process(output_dir) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1042 try: |
2
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
1043 config_html = format_config_table_html( |
9
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents:
8
diff
changeset
|
1044 config, split_info, training_progress, output_type |
2
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
1045 ) |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1046 except Exception as e: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1047 logger.warning(f"Could not load config for HTML report: {e}") |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1048 |
10
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
9
diff
changeset
|
1049 # ---------- image rendering with exclusions ---------- |
2
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
1050 def render_img_section( |
10
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
9
diff
changeset
|
1051 title: str, |
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
9
diff
changeset
|
1052 dir_path: Path, |
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
9
diff
changeset
|
1053 output_type: str = None, |
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
9
diff
changeset
|
1054 exclude_names: Optional[set] = None, |
2
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
1055 ) -> str: |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1056 if not dir_path.exists(): |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1057 return f"<h2>{title}</h2><p><em>Directory not found.</em></p>" |
10
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
9
diff
changeset
|
1058 |
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
9
diff
changeset
|
1059 exclude_names = exclude_names or set() |
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
9
diff
changeset
|
1060 |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1061 imgs = list(dir_path.glob("*.png")) |
10
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
9
diff
changeset
|
1062 |
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
9
diff
changeset
|
1063 default_exclude = {"confusion_matrix.png", "roc_curves.png"} |
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
9
diff
changeset
|
1064 |
8
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
1065 imgs = [ |
9
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents:
8
diff
changeset
|
1066 img |
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents:
8
diff
changeset
|
1067 for img in imgs |
10
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
9
diff
changeset
|
1068 if img.name not in default_exclude |
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
9
diff
changeset
|
1069 and img.name not in exclude_names |
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
9
diff
changeset
|
1070 and not img.name.startswith("confusion_matrix__label_top") |
8
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
1071 ] |
10
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
9
diff
changeset
|
1072 |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1073 if not imgs: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1074 return f"<h2>{title}</h2><p><em>No plots found.</em></p>" |
10
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
9
diff
changeset
|
1075 |
8
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
1076 if output_type == "binary": |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1077 order = [ |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1078 "roc_curves_from_prediction_statistics.png", |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1079 "compare_performance_label.png", |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1080 "confusion_matrix_entropy__label_top2.png", |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1081 ] |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1082 img_names = {img.name: img for img in imgs} |
8
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
1083 ordered = [img_names[n] for n in order if n in img_names] |
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
1084 others = sorted(img for img in imgs if img.name not in order) |
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
1085 imgs = ordered + others |
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
1086 elif output_type == "category": |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1087 unwanted = { |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1088 "compare_classifiers_multiclass_multimetric__label_best10.png", |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1089 "compare_classifiers_multiclass_multimetric__label_top10.png", |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1090 "compare_classifiers_multiclass_multimetric__label_worst10.png", |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1091 } |
10
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
9
diff
changeset
|
1092 valid_imgs = [img for img in imgs if img.name not in unwanted] |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1093 display_order = [ |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1094 "roc_curves.png", |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1095 "compare_performance_label.png", |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1096 "compare_classifiers_performance_from_prob.png", |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1097 "confusion_matrix_entropy__label_top10.png", |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1098 ] |
8
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
1099 img_map = {img.name: img for img in valid_imgs} |
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
1100 ordered = [img_map[n] for n in display_order if n in img_map] |
9
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents:
8
diff
changeset
|
1101 others = sorted( |
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents:
8
diff
changeset
|
1102 img for img in valid_imgs if img.name not in display_order |
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents:
8
diff
changeset
|
1103 ) |
8
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
1104 imgs = ordered + others |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1105 else: |
8
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
1106 imgs = sorted(imgs) |
10
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
9
diff
changeset
|
1107 |
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
9
diff
changeset
|
1108 html_section = "" |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1109 for img in imgs: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1110 b64 = encode_image_to_base64(str(img)) |
8
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
1111 img_title = img.stem.replace("_", " ").title() |
10
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
9
diff
changeset
|
1112 html_section += ( |
8
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
1113 f"<h2 style='text-align: center;'>{img_title}</h2>" |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1114 f'<div class="plot" style="margin-bottom:20px;text-align:center;">' |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1115 f'<img src="data:image/png;base64,{b64}" ' |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1116 f'style="max-width:90%;max-height:600px;border:1px solid #ddd;" />' |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1117 f"</div>" |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1118 ) |
10
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
9
diff
changeset
|
1119 return html_section |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1120 |
2
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
1121 tab1_content = config_html + metrics_html |
10
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
9
diff
changeset
|
1122 |
2
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
1123 tab2_content = train_val_metrics_html + render_img_section( |
10
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
9
diff
changeset
|
1124 "Training and Validation Visualizations", |
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
9
diff
changeset
|
1125 train_viz_dir, |
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
9
diff
changeset
|
1126 output_type, |
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
9
diff
changeset
|
1127 exclude_names={ |
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
9
diff
changeset
|
1128 "compare_classifiers_performance_from_prob.png", |
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
9
diff
changeset
|
1129 "roc_curves_from_prediction_statistics.png", |
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
9
diff
changeset
|
1130 "precision_recall_curves_from_prediction_statistics.png", |
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
9
diff
changeset
|
1131 "precision_recall_curve.png", |
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
9
diff
changeset
|
1132 }, |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1133 ) |
10
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
9
diff
changeset
|
1134 |
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
9
diff
changeset
|
1135 # --- Predictions vs Ground Truth table (REGRESSION ONLY) --- |
2
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
1136 preds_section = "" |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
1137 parquet_path = exp_dir / PREDICTIONS_PARQUET_FILE_NAME |
8
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
1138 if output_type == "regression" and parquet_path.exists(): |
2
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
1139 try: |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
1140 # 1) load predictions from Parquet |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
1141 df_preds = pd.read_parquet(parquet_path).reset_index(drop=True) |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
1142 # assume the column containing your model's prediction is named "prediction" |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
1143 pred_col = next( |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
1144 (c for c in df_preds.columns if "prediction" in c.lower()), |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
1145 None, |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
1146 ) |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
1147 if pred_col is None: |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
1148 raise ValueError("No prediction column found in Parquet output") |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
1149 df_pred = df_preds[[pred_col]].rename(columns={pred_col: "prediction"}) |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
1150 # 2) load ground truth for the test split from prepared CSV |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
1151 df_all = pd.read_csv(config["label_column_data_path"]) |
9
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents:
8
diff
changeset
|
1152 df_gt = df_all[df_all[SPLIT_COLUMN_NAME] == 2][ |
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents:
8
diff
changeset
|
1153 LABEL_COLUMN_NAME |
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents:
8
diff
changeset
|
1154 ].reset_index(drop=True) |
8
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
1155 # 3) concatenate side-by-side |
2
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
1156 df_table = pd.concat([df_gt, df_pred], axis=1) |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
1157 df_table.columns = [LABEL_COLUMN_NAME, "prediction"] |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
1158 # 4) render as HTML |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
1159 preds_html = df_table.to_html(index=False, classes="predictions-table") |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
1160 preds_section = ( |
8
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
1161 "<h2 style='text-align: center;'>Ground Truth vs. Predictions</h2>" |
10
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
9
diff
changeset
|
1162 "<div class='preds-controls'>" |
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
9
diff
changeset
|
1163 "<button id='downloadPredsCsv' class='download-btn'>Download CSV</button>" |
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
9
diff
changeset
|
1164 "</div>" |
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
9
diff
changeset
|
1165 "<div class='scroll-rows-30' style='overflow-x:auto; overflow-y:auto; max-height:900px; margin-bottom:20px;'>" |
2
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
1166 + preds_html |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
1167 + "</div>" |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
1168 ) |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
1169 except Exception as e: |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
1170 logger.warning(f"Could not build Predictions vs GT table: {e}") |
10
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
9
diff
changeset
|
1171 |
8
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
1172 tab3_content = test_metrics_html + preds_section |
10
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
9
diff
changeset
|
1173 |
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
9
diff
changeset
|
1174 # Classification-only interactive Plotly panels (centered) |
8
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
1175 if output_type in ("binary", "category"): |
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
1176 training_stats_path = exp_dir / "training_statistics.json" |
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
1177 interactive_plots = build_classification_plots( |
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
1178 str(test_stats_path), |
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
1179 str(training_stats_path), |
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
1180 ) |
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
1181 for plot in interactive_plots: |
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
1182 tab3_content += ( |
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
1183 f"<h2 style='text-align: center;'>{plot['title']}</h2>" |
10
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
9
diff
changeset
|
1184 f"<div class='plotly-center'>{plot['html']}</div>" |
8
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
1185 ) |
10
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
9
diff
changeset
|
1186 |
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
9
diff
changeset
|
1187 # Add static TEST PNGs (with default dedupe/exclusions) |
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
9
diff
changeset
|
1188 tab3_content += render_img_section( |
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
9
diff
changeset
|
1189 "Test Visualizations", test_viz_dir, output_type |
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
9
diff
changeset
|
1190 ) |
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
goeckslab
parents:
9
diff
changeset
|
1191 |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1192 tabbed_html = build_tabbed_html(tab1_content, tab2_content, tab3_content) |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1193 modal_html = get_metrics_help_modal() |
2
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
1194 html += tabbed_html + modal_html + get_html_closing() |
9
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents:
8
diff
changeset
|
1195 |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1196 try: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1197 with open(report_path, "w") as f: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1198 f.write(html) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1199 logger.info(f"HTML report generated at: {report_path}") |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1200 except Exception as e: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1201 logger.error(f"Failed to write HTML report: {e}") |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1202 raise |
9
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents:
8
diff
changeset
|
1203 |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1204 return report_path |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1205 |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1206 |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1207 class WorkflowOrchestrator: |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1208 """Manages the image-classification workflow.""" |
9
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents:
8
diff
changeset
|
1209 |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1210 def __init__(self, args: argparse.Namespace, backend: Backend): |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1211 self.args = args |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1212 self.backend = backend |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1213 self.temp_dir: Optional[Path] = None |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1214 self.image_extract_dir: Optional[Path] = None |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1215 logger.info(f"Orchestrator initialized with backend: {type(backend).__name__}") |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1216 |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1217 def _create_temp_dirs(self) -> None: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1218 """Create temporary output and image extraction directories.""" |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1219 try: |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1220 self.temp_dir = Path( |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1221 tempfile.mkdtemp(dir=self.args.output_dir, prefix=TEMP_DIR_PREFIX) |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1222 ) |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1223 self.image_extract_dir = self.temp_dir / "images" |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1224 self.image_extract_dir.mkdir() |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1225 logger.info(f"Created temp directory: {self.temp_dir}") |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1226 except Exception: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1227 logger.error("Failed to create temporary directories", exc_info=True) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1228 raise |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1229 |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1230 def _extract_images(self) -> None: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1231 """Extract images from ZIP into the temp image directory.""" |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1232 if self.image_extract_dir is None: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1233 raise RuntimeError("Temp image directory not initialized.") |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1234 logger.info( |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1235 f"Extracting images from {self.args.image_zip} → {self.image_extract_dir}" |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1236 ) |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1237 try: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1238 with zipfile.ZipFile(self.args.image_zip, "r") as z: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1239 z.extractall(self.image_extract_dir) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1240 logger.info("Image extraction complete.") |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1241 except Exception: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1242 logger.error("Error extracting zip file", exc_info=True) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1243 raise |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1244 |
2
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
1245 def _prepare_data(self) -> Tuple[Path, Dict[str, Any], str]: |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1246 """Load CSV, update image paths, handle splits, and write prepared CSV.""" |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1247 if not self.temp_dir or not self.image_extract_dir: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1248 raise RuntimeError("Temp dirs not initialized before data prep.") |
9
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents:
8
diff
changeset
|
1249 |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1250 try: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1251 df = pd.read_csv(self.args.csv_file) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1252 logger.info(f"Loaded CSV: {self.args.csv_file}") |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1253 except Exception: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1254 logger.error("Error loading CSV file", exc_info=True) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1255 raise |
9
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents:
8
diff
changeset
|
1256 |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1257 required = {IMAGE_PATH_COLUMN_NAME, LABEL_COLUMN_NAME} |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1258 missing = required - set(df.columns) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1259 if missing: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1260 raise ValueError(f"Missing CSV columns: {', '.join(missing)}") |
9
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents:
8
diff
changeset
|
1261 |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1262 try: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1263 df[IMAGE_PATH_COLUMN_NAME] = df[IMAGE_PATH_COLUMN_NAME].apply( |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1264 lambda p: str((self.image_extract_dir / p).resolve()) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1265 ) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1266 except Exception: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1267 logger.error("Error updating image paths", exc_info=True) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1268 raise |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1269 if SPLIT_COLUMN_NAME in df.columns: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1270 df, split_config, split_info = self._process_fixed_split(df) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1271 else: |
7
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents:
6
diff
changeset
|
1272 logger.info("No split column; creating stratified random split") |
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents:
6
diff
changeset
|
1273 df = create_stratified_random_split( |
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents:
6
diff
changeset
|
1274 df=df, |
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents:
6
diff
changeset
|
1275 split_column=SPLIT_COLUMN_NAME, |
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents:
6
diff
changeset
|
1276 split_probabilities=self.args.split_probabilities, |
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents:
6
diff
changeset
|
1277 random_state=self.args.random_seed, |
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents:
6
diff
changeset
|
1278 label_column=LABEL_COLUMN_NAME, |
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents:
6
diff
changeset
|
1279 ) |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1280 split_config = { |
7
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents:
6
diff
changeset
|
1281 "type": "fixed", |
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents:
6
diff
changeset
|
1282 "column": SPLIT_COLUMN_NAME, |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1283 } |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1284 split_info = ( |
7
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents:
6
diff
changeset
|
1285 f"No split column in CSV. Created stratified random split: " |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1286 f"{[int(p * 100) for p in self.args.split_probabilities]}% " |
7
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents:
6
diff
changeset
|
1287 f"for train/val/test with balanced label distribution." |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1288 ) |
9
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents:
8
diff
changeset
|
1289 |
2
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
1290 final_csv = self.temp_dir / TEMP_CSV_FILENAME |
9
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents:
8
diff
changeset
|
1291 |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1292 try: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1293 df.to_csv(final_csv, index=False) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1294 logger.info(f"Saved prepared data to {final_csv}") |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1295 except Exception: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1296 logger.error("Error saving prepared CSV", exc_info=True) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1297 raise |
9
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents:
8
diff
changeset
|
1298 |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1299 return final_csv, split_config, split_info |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1300 |
2
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
1301 def _process_fixed_split( |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
1302 self, df: pd.DataFrame |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
1303 ) -> Tuple[pd.DataFrame, Dict[str, Any], str]: |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1304 """Process a fixed split column (0=train,1=val,2=test).""" |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1305 logger.info(f"Fixed split column '{SPLIT_COLUMN_NAME}' detected.") |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1306 try: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1307 col = df[SPLIT_COLUMN_NAME] |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1308 df[SPLIT_COLUMN_NAME] = pd.to_numeric(col, errors="coerce").astype( |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1309 pd.Int64Dtype() |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1310 ) |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1311 if df[SPLIT_COLUMN_NAME].isna().any(): |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1312 logger.warning("Split column contains non-numeric/missing values.") |
9
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents:
8
diff
changeset
|
1313 |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1314 unique = set(df[SPLIT_COLUMN_NAME].dropna().unique()) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1315 logger.info(f"Unique split values: {unique}") |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1316 if unique == {0, 2}: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1317 df = split_data_0_2( |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1318 df, |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1319 SPLIT_COLUMN_NAME, |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1320 validation_size=self.args.validation_size, |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1321 label_column=LABEL_COLUMN_NAME, |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1322 random_state=self.args.random_seed, |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1323 ) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1324 split_info = ( |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1325 "Detected a split column (with values 0 and 2) in the input CSV. " |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1326 f"Used this column as a base and reassigned " |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1327 f"{self.args.validation_size * 100:.1f}% " |
7
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
goeckslab
parents:
6
diff
changeset
|
1328 "of the training set (originally labeled 0) to validation (labeled 1) using stratified sampling." |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1329 ) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1330 logger.info("Applied custom 0/2 split.") |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1331 elif unique.issubset({0, 1, 2}): |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1332 split_info = "Used user-defined split column from CSV." |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1333 logger.info("Using fixed split as-is.") |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1334 else: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1335 raise ValueError(f"Unexpected split values: {unique}") |
9
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents:
8
diff
changeset
|
1336 |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1337 return df, {"type": "fixed", "column": SPLIT_COLUMN_NAME}, split_info |
9
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents:
8
diff
changeset
|
1338 |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1339 except Exception: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1340 logger.error("Error processing fixed split", exc_info=True) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1341 raise |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1342 |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1343 def _cleanup_temp_dirs(self) -> None: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1344 if self.temp_dir and self.temp_dir.exists(): |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1345 logger.info(f"Cleaning up temp directory: {self.temp_dir}") |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1346 shutil.rmtree(self.temp_dir, ignore_errors=True) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1347 self.temp_dir = None |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1348 self.image_extract_dir = None |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1349 |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1350 def run(self) -> None: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1351 """Execute the full workflow end-to-end.""" |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1352 logger.info("Starting workflow...") |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1353 self.args.output_dir.mkdir(parents=True, exist_ok=True) |
9
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents:
8
diff
changeset
|
1354 |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1355 try: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1356 self._create_temp_dirs() |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1357 self._extract_images() |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1358 csv_path, split_cfg, split_info = self._prepare_data() |
9
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents:
8
diff
changeset
|
1359 |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1360 use_pretrained = self.args.use_pretrained or self.args.fine_tune |
9
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents:
8
diff
changeset
|
1361 |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1362 backend_args = { |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1363 "model_name": self.args.model_name, |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1364 "fine_tune": self.args.fine_tune, |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1365 "use_pretrained": use_pretrained, |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1366 "epochs": self.args.epochs, |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1367 "batch_size": self.args.batch_size, |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1368 "preprocessing_num_processes": self.args.preprocessing_num_processes, |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1369 "split_probabilities": self.args.split_probabilities, |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1370 "learning_rate": self.args.learning_rate, |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1371 "random_seed": self.args.random_seed, |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1372 "early_stop": self.args.early_stop, |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1373 "label_column_data_path": csv_path, |
2
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
1374 "augmentation": self.args.augmentation, |
8
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
1375 "threshold": self.args.threshold, |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1376 } |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1377 yaml_str = self.backend.prepare_config(backend_args, split_cfg) |
9
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents:
8
diff
changeset
|
1378 |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1379 config_file = self.temp_dir / TEMP_CONFIG_FILENAME |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1380 config_file.write_text(yaml_str) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1381 logger.info(f"Wrote backend config: {config_file}") |
9
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents:
8
diff
changeset
|
1382 |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1383 self.backend.run_experiment( |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1384 csv_path, |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1385 config_file, |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1386 self.args.output_dir, |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1387 self.args.random_seed, |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1388 ) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1389 logger.info("Workflow completed successfully.") |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1390 self.backend.generate_plots(self.args.output_dir) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1391 report_file = self.backend.generate_html_report( |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1392 "Image Classification Results", |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1393 self.args.output_dir, |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1394 backend_args, |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1395 split_info, |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1396 ) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1397 logger.info(f"HTML report generated at: {report_file}") |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1398 self.backend.convert_parquet_to_csv(self.args.output_dir) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1399 logger.info("Converted Parquet to CSV.") |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1400 except Exception: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1401 logger.error("Workflow execution failed", exc_info=True) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1402 raise |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1403 finally: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1404 self._cleanup_temp_dirs() |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1405 |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1406 |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1407 def parse_learning_rate(s): |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1408 try: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1409 return float(s) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1410 except (TypeError, ValueError): |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1411 return None |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1412 |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1413 |
2
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
1414 def aug_parse(aug_string: str): |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
1415 """ |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
1416 Parse comma-separated augmentation keys into Ludwig augmentation dicts. |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
1417 Raises ValueError on unknown key. |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
1418 """ |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
1419 mapping = { |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
1420 "random_horizontal_flip": {"type": "random_horizontal_flip"}, |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
1421 "random_vertical_flip": {"type": "random_vertical_flip"}, |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
1422 "random_rotate": {"type": "random_rotate", "degree": 10}, |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
1423 "random_blur": {"type": "random_blur", "kernel_size": 3}, |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
1424 "random_brightness": {"type": "random_brightness", "min": 0.5, "max": 2.0}, |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
1425 "random_contrast": {"type": "random_contrast", "min": 0.5, "max": 2.0}, |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
1426 } |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
1427 aug_list = [] |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
1428 for tok in aug_string.split(","): |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
1429 key = tok.strip() |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
1430 if key not in mapping: |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
1431 valid = ", ".join(mapping.keys()) |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
1432 raise ValueError(f"Unknown augmentation '{key}'. Valid choices: {valid}") |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
1433 aug_list.append(mapping[key]) |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
1434 return aug_list |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
1435 |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
1436 |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1437 class SplitProbAction(argparse.Action): |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1438 def __call__(self, parser, namespace, values, option_string=None): |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1439 train, val, test = values |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1440 total = train + val + test |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1441 if abs(total - 1.0) > 1e-6: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1442 parser.error( |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1443 f"--split-probabilities must sum to 1.0; " |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1444 f"got {train:.3f} + {val:.3f} + {test:.3f} = {total:.3f}" |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1445 ) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1446 setattr(namespace, self.dest, values) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1447 |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1448 |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1449 def main(): |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1450 parser = argparse.ArgumentParser( |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1451 description="Image Classification Learner with Pluggable Backends", |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1452 ) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1453 parser.add_argument( |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1454 "--csv-file", |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1455 required=True, |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1456 type=Path, |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1457 help="Path to the input CSV", |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1458 ) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1459 parser.add_argument( |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1460 "--image-zip", |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1461 required=True, |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1462 type=Path, |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1463 help="Path to the images ZIP", |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1464 ) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1465 parser.add_argument( |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1466 "--model-name", |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1467 required=True, |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1468 choices=MODEL_ENCODER_TEMPLATES.keys(), |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1469 help="Which model template to use", |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1470 ) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1471 parser.add_argument( |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1472 "--use-pretrained", |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1473 action="store_true", |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1474 help="Use pretrained weights for the model", |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1475 ) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1476 parser.add_argument( |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1477 "--fine-tune", |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1478 action="store_true", |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1479 help="Enable fine-tuning", |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1480 ) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1481 parser.add_argument( |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1482 "--epochs", |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1483 type=int, |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1484 default=10, |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1485 help="Number of training epochs", |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1486 ) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1487 parser.add_argument( |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1488 "--early-stop", |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1489 type=int, |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1490 default=5, |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1491 help="Early stopping patience", |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1492 ) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1493 parser.add_argument( |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1494 "--batch-size", |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1495 type=int, |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1496 help="Batch size (None = auto)", |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1497 ) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1498 parser.add_argument( |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1499 "--output-dir", |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1500 type=Path, |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1501 default=Path("learner_output"), |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1502 help="Where to write outputs", |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1503 ) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1504 parser.add_argument( |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1505 "--validation-size", |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1506 type=float, |
8
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
1507 default=0.15, |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1508 help="Fraction for validation (0.0–1.0)", |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1509 ) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1510 parser.add_argument( |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1511 "--preprocessing-num-processes", |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1512 type=int, |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1513 default=max(1, os.cpu_count() // 2), |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1514 help="CPU processes for data prep", |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1515 ) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1516 parser.add_argument( |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1517 "--split-probabilities", |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1518 type=float, |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1519 nargs=3, |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1520 metavar=("train", "val", "test"), |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1521 action=SplitProbAction, |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1522 default=[0.7, 0.1, 0.2], |
2
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
1523 help=( |
9
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents:
8
diff
changeset
|
1524 "Random split proportions (e.g., 0.7 0.1 0.2).Only used if no split column." |
2
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
1525 ), |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1526 ) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1527 parser.add_argument( |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1528 "--random-seed", |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1529 type=int, |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1530 default=42, |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1531 help="Random seed used for dataset splitting (default: 42)", |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1532 ) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1533 parser.add_argument( |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1534 "--learning-rate", |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1535 type=parse_learning_rate, |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1536 default=None, |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1537 help="Learning rate. If not provided, Ludwig will auto-select it.", |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1538 ) |
2
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
1539 parser.add_argument( |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
1540 "--augmentation", |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
1541 type=str, |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
1542 default=None, |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
1543 help=( |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
1544 "Comma-separated list (in order) of any of: " |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
1545 "random_horizontal_flip, random_vertical_flip, random_rotate, " |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
1546 "random_blur, random_brightness, random_contrast. " |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
1547 "E.g. --augmentation random_horizontal_flip,random_rotate" |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
1548 ), |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
1549 ) |
8
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
1550 parser.add_argument( |
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
1551 "--threshold", |
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
1552 type=float, |
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
1553 default=None, |
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
1554 help=( |
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
1555 "Decision threshold for binary classification (0.0–1.0)." |
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
1556 "Overrides default 0.5." |
9
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents:
8
diff
changeset
|
1557 ), |
8
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
goeckslab
parents:
7
diff
changeset
|
1558 ) |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1559 args = parser.parse_args() |
9
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents:
8
diff
changeset
|
1560 |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1561 if not 0.0 <= args.validation_size <= 1.0: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1562 parser.error("validation-size must be between 0.0 and 1.0") |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1563 if not args.csv_file.is_file(): |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1564 parser.error(f"CSV not found: {args.csv_file}") |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1565 if not args.image_zip.is_file(): |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1566 parser.error(f"ZIP not found: {args.image_zip}") |
2
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
1567 if args.augmentation is not None: |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
1568 try: |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
1569 augmentation_setup = aug_parse(args.augmentation) |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
1570 setattr(args, "augmentation", augmentation_setup) |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
1571 except ValueError as e: |
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
goeckslab
parents:
1
diff
changeset
|
1572 parser.error(str(e)) |
9
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents:
8
diff
changeset
|
1573 |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1574 backend_instance = LudwigDirectBackend() |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1575 orchestrator = WorkflowOrchestrator(args, backend_instance) |
9
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents:
8
diff
changeset
|
1576 |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1577 exit_code = 0 |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1578 try: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1579 orchestrator.run() |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1580 logger.info("Main script finished successfully.") |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1581 except Exception as e: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1582 logger.error(f"Main script failed.{e}") |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1583 exit_code = 1 |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1584 finally: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1585 sys.exit(exit_code) |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1586 |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1587 |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1588 if __name__ == "__main__": |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1589 try: |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1590 import ludwig |
9
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents:
8
diff
changeset
|
1591 |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1592 logger.debug(f"Found Ludwig version: {ludwig.globals.LUDWIG_VERSION}") |
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1593 except ImportError: |
1
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1594 logger.error( |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1595 "Ludwig library not found. Please ensure Ludwig is installed " |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1596 "('pip install ludwig[image]')" |
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
1597 ) |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1598 sys.exit(1) |
9
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
goeckslab
parents:
8
diff
changeset
|
1599 |
0
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
goeckslab
parents:
diff
changeset
|
1600 main() |