annotate ludwig_backend.py @ 16:8729f69e9207 draft default tip

planemo upload for repository https://github.com/goeckslab/gleam.git commit bb4bcdc888d73bbfd85d78ce8999a1080fe813ff
author goeckslab
date Wed, 03 Dec 2025 01:28:52 +0000
parents d17e3a1b8659
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
12
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1 import json
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
2 import logging
16
8729f69e9207 planemo upload for repository https://github.com/goeckslab/gleam.git commit bb4bcdc888d73bbfd85d78ce8999a1080fe813ff
goeckslab
parents: 15
diff changeset
3 import os
12
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
4 from pathlib import Path
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
5 from typing import Any, Dict, Optional, Protocol, Tuple
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
6
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
7 import pandas as pd
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
8 import pandas.api.types as ptypes
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
9 import yaml
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
10 from constants import (
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
11 IMAGE_PATH_COLUMN_NAME,
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
12 LABEL_COLUMN_NAME,
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
13 MODEL_ENCODER_TEMPLATES,
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
14 SPLIT_COLUMN_NAME,
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
15 )
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
16 from html_structure import (
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
17 build_tabbed_html,
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
18 encode_image_to_base64,
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
19 format_config_table_html,
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
20 format_stats_table_html,
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
21 format_test_merged_stats_table_html,
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
22 format_train_val_stats_table_html,
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
23 get_html_closing,
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
24 get_html_template,
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
25 get_metrics_help_modal,
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
26 )
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
27 from ludwig.globals import (
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
28 DESCRIPTION_FILE_NAME,
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
29 PREDICTIONS_PARQUET_FILE_NAME,
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
30 TEST_STATISTICS_FILE_NAME,
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
31 TRAIN_SET_METADATA_FILE_NAME,
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
32 )
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
33 from ludwig.utils.data_utils import get_split_path
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
34 from metaformer_setup import get_visualizations_registry, META_DEFAULT_CFGS
15
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
35 from plotly_plots import (
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
36 build_classification_plots,
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
37 build_prediction_diagnostics,
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
38 build_regression_test_plots,
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
39 build_regression_train_val_plots,
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
40 build_train_validation_plots,
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
41 )
12
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
42 from utils import detect_output_type, extract_metrics_from_json
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
43
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
44 logger = logging.getLogger("ImageLearner")
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
45
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
46
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
47 class Backend(Protocol):
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
48 """Interface for a machine learning backend."""
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
49
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
50 def prepare_config(
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
51 self,
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
52 config_params: Dict[str, Any],
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
53 split_config: Dict[str, Any],
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
54 ) -> str:
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
55 ...
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
56
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
57 def run_experiment(
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
58 self,
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
59 dataset_path: Path,
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
60 config_path: Path,
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
61 output_dir: Path,
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
62 random_seed: int,
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
63 ) -> None:
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
64 ...
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
65
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
66 def generate_plots(self, output_dir: Path) -> None:
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
67 ...
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
68
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
69 def generate_html_report(
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
70 self,
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
71 title: str,
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
72 output_dir: str,
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
73 config: Dict[str, Any],
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
74 split_info: str,
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
75 ) -> Path:
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
76 ...
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
77
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
78
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
79 class LudwigDirectBackend:
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
80 """Backend for running Ludwig experiments directly via the internal experiment_cli function."""
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
81
15
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
82 _torchvision_patched = False
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
83
12
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
84 def _detect_image_dimensions(self, image_zip_path: str) -> Tuple[int, int]:
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
85 """Detect image dimensions from the first image in the dataset."""
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
86 try:
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
87 import zipfile
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
88 from PIL import Image
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
89 import io
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
90
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
91 # Check if image_zip is provided
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
92 if not image_zip_path:
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
93 logger.warning("No image zip provided, using default 224x224")
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
94 return 224, 224
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
95
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
96 # Extract first image to detect dimensions
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
97 with zipfile.ZipFile(image_zip_path, 'r') as z:
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
98 image_files = [f for f in z.namelist() if f.lower().endswith(('.png', '.jpg', '.jpeg'))]
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
99 if not image_files:
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
100 logger.warning("No image files found in zip, using default 224x224")
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
101 return 224, 224
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
102
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
103 # Check first image
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
104 with z.open(image_files[0]) as f:
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
105 img = Image.open(io.BytesIO(f.read()))
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
106 width, height = img.size
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
107 logger.info(f"Detected image dimensions: {width}x{height}")
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
108 return height, width # Return as (height, width) to match encoder config
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
109
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
110 except Exception as e:
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
111 logger.warning(f"Error detecting image dimensions: {e}, using default 224x224")
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
112 return 224, 224
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
113
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
114 def prepare_config(
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
115 self,
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
116 config_params: Dict[str, Any],
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
117 split_config: Dict[str, Any],
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
118 ) -> str:
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
119 logger.info("LudwigDirectBackend: Preparing YAML configuration.")
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
120
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
121 model_name = config_params.get("model_name", "resnet18")
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
122 use_pretrained = config_params.get("use_pretrained", False)
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
123 fine_tune = config_params.get("fine_tune", False)
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
124 if use_pretrained:
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
125 trainable = bool(fine_tune)
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
126 else:
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
127 trainable = True
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
128 epochs = config_params.get("epochs", 10)
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
129 batch_size = config_params.get("batch_size")
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
130 num_processes = config_params.get("preprocessing_num_processes", 1)
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
131 early_stop = config_params.get("early_stop", None)
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
132 learning_rate = config_params.get("learning_rate")
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
133 learning_rate = "auto" if learning_rate is None else float(learning_rate)
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
134 raw_encoder = MODEL_ENCODER_TEMPLATES.get(model_name, model_name)
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
135
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
136 # --- MetaFormer detection and config logic ---
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
137 def _is_metaformer(name: str) -> bool:
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
138 return isinstance(name, str) and name.startswith(
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
139 (
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
140 "identityformer_",
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
141 "randformer_",
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
142 "poolformerv2_",
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
143 "convformer_",
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
144 "caformer_",
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
145 )
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
146 )
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
147
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
148 # Check if this is a MetaFormer model (either direct name or in custom_model)
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
149 is_metaformer = (
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
150 _is_metaformer(model_name)
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
151 or (isinstance(raw_encoder, dict) and "custom_model" in raw_encoder and _is_metaformer(raw_encoder["custom_model"]))
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
152 )
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
153
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
154 metaformer_resize: Optional[Tuple[int, int]] = None
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
155 metaformer_channels = 3
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
156
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
157 if is_metaformer:
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
158 # Handle MetaFormer models
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
159 custom_model = None
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
160 if isinstance(raw_encoder, dict) and "custom_model" in raw_encoder:
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
161 custom_model = raw_encoder["custom_model"]
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
162 else:
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
163 custom_model = model_name
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
164
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
165 logger.info(f"DETECTED MetaFormer model: {custom_model}")
16
8729f69e9207 planemo upload for repository https://github.com/goeckslab/gleam.git commit bb4bcdc888d73bbfd85d78ce8999a1080fe813ff
goeckslab
parents: 15
diff changeset
166 # Stash the model name for patched Stacked2DCNN in case Ludwig drops custom_model from kwargs
8729f69e9207 planemo upload for repository https://github.com/goeckslab/gleam.git commit bb4bcdc888d73bbfd85d78ce8999a1080fe813ff
goeckslab
parents: 15
diff changeset
167 try:
8729f69e9207 planemo upload for repository https://github.com/goeckslab/gleam.git commit bb4bcdc888d73bbfd85d78ce8999a1080fe813ff
goeckslab
parents: 15
diff changeset
168 from MetaFormer.metaformer_stacked_cnn import set_current_metaformer_model
8729f69e9207 planemo upload for repository https://github.com/goeckslab/gleam.git commit bb4bcdc888d73bbfd85d78ce8999a1080fe813ff
goeckslab
parents: 15
diff changeset
169
8729f69e9207 planemo upload for repository https://github.com/goeckslab/gleam.git commit bb4bcdc888d73bbfd85d78ce8999a1080fe813ff
goeckslab
parents: 15
diff changeset
170 set_current_metaformer_model(custom_model)
8729f69e9207 planemo upload for repository https://github.com/goeckslab/gleam.git commit bb4bcdc888d73bbfd85d78ce8999a1080fe813ff
goeckslab
parents: 15
diff changeset
171 except Exception:
8729f69e9207 planemo upload for repository https://github.com/goeckslab/gleam.git commit bb4bcdc888d73bbfd85d78ce8999a1080fe813ff
goeckslab
parents: 15
diff changeset
172 logger.debug("Could not set current MetaFormer model hint; proceeding without global override")
8729f69e9207 planemo upload for repository https://github.com/goeckslab/gleam.git commit bb4bcdc888d73bbfd85d78ce8999a1080fe813ff
goeckslab
parents: 15
diff changeset
173 # Also pass via environment to survive process boundaries (e.g., Ray workers)
8729f69e9207 planemo upload for repository https://github.com/goeckslab/gleam.git commit bb4bcdc888d73bbfd85d78ce8999a1080fe813ff
goeckslab
parents: 15
diff changeset
174 os.environ["GLEAM_META_FORMER_MODEL"] = custom_model
12
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
175 cfg_channels, cfg_height, cfg_width = 3, 224, 224
16
8729f69e9207 planemo upload for repository https://github.com/goeckslab/gleam.git commit bb4bcdc888d73bbfd85d78ce8999a1080fe813ff
goeckslab
parents: 15
diff changeset
176 model_cfg = {}
12
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
177 if META_DEFAULT_CFGS:
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
178 model_cfg = META_DEFAULT_CFGS.get(custom_model, {})
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
179 input_size = model_cfg.get("input_size")
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
180 if isinstance(input_size, (list, tuple)) and len(input_size) == 3:
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
181 cfg_channels, cfg_height, cfg_width = (
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
182 int(input_size[0]),
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
183 int(input_size[1]),
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
184 int(input_size[2]),
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
185 )
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
186
16
8729f69e9207 planemo upload for repository https://github.com/goeckslab/gleam.git commit bb4bcdc888d73bbfd85d78ce8999a1080fe813ff
goeckslab
parents: 15
diff changeset
187 weights_url = None
8729f69e9207 planemo upload for repository https://github.com/goeckslab/gleam.git commit bb4bcdc888d73bbfd85d78ce8999a1080fe813ff
goeckslab
parents: 15
diff changeset
188 if isinstance(model_cfg, dict):
8729f69e9207 planemo upload for repository https://github.com/goeckslab/gleam.git commit bb4bcdc888d73bbfd85d78ce8999a1080fe813ff
goeckslab
parents: 15
diff changeset
189 weights_url = model_cfg.get("url")
8729f69e9207 planemo upload for repository https://github.com/goeckslab/gleam.git commit bb4bcdc888d73bbfd85d78ce8999a1080fe813ff
goeckslab
parents: 15
diff changeset
190 logger.info(
8729f69e9207 planemo upload for repository https://github.com/goeckslab/gleam.git commit bb4bcdc888d73bbfd85d78ce8999a1080fe813ff
goeckslab
parents: 15
diff changeset
191 "MetaFormer cfg lookup: model=%s has_cfg=%s url=%s use_pretrained=%s",
8729f69e9207 planemo upload for repository https://github.com/goeckslab/gleam.git commit bb4bcdc888d73bbfd85d78ce8999a1080fe813ff
goeckslab
parents: 15
diff changeset
192 custom_model,
8729f69e9207 planemo upload for repository https://github.com/goeckslab/gleam.git commit bb4bcdc888d73bbfd85d78ce8999a1080fe813ff
goeckslab
parents: 15
diff changeset
193 bool(model_cfg),
8729f69e9207 planemo upload for repository https://github.com/goeckslab/gleam.git commit bb4bcdc888d73bbfd85d78ce8999a1080fe813ff
goeckslab
parents: 15
diff changeset
194 weights_url,
8729f69e9207 planemo upload for repository https://github.com/goeckslab/gleam.git commit bb4bcdc888d73bbfd85d78ce8999a1080fe813ff
goeckslab
parents: 15
diff changeset
195 use_pretrained,
8729f69e9207 planemo upload for repository https://github.com/goeckslab/gleam.git commit bb4bcdc888d73bbfd85d78ce8999a1080fe813ff
goeckslab
parents: 15
diff changeset
196 )
8729f69e9207 planemo upload for repository https://github.com/goeckslab/gleam.git commit bb4bcdc888d73bbfd85d78ce8999a1080fe813ff
goeckslab
parents: 15
diff changeset
197 if use_pretrained and not weights_url:
8729f69e9207 planemo upload for repository https://github.com/goeckslab/gleam.git commit bb4bcdc888d73bbfd85d78ce8999a1080fe813ff
goeckslab
parents: 15
diff changeset
198 logger.warning(
8729f69e9207 planemo upload for repository https://github.com/goeckslab/gleam.git commit bb4bcdc888d73bbfd85d78ce8999a1080fe813ff
goeckslab
parents: 15
diff changeset
199 "MetaFormer pretrained requested for %s but no URL found in default cfgs; model will be randomly initialized",
8729f69e9207 planemo upload for repository https://github.com/goeckslab/gleam.git commit bb4bcdc888d73bbfd85d78ce8999a1080fe813ff
goeckslab
parents: 15
diff changeset
200 custom_model,
8729f69e9207 planemo upload for repository https://github.com/goeckslab/gleam.git commit bb4bcdc888d73bbfd85d78ce8999a1080fe813ff
goeckslab
parents: 15
diff changeset
201 )
8729f69e9207 planemo upload for repository https://github.com/goeckslab/gleam.git commit bb4bcdc888d73bbfd85d78ce8999a1080fe813ff
goeckslab
parents: 15
diff changeset
202
12
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
203 resize_value = config_params.get("image_resize")
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
204 if resize_value and resize_value != "original":
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
205 try:
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
206 dimensions = resize_value.split("x")
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
207 if len(dimensions) == 2:
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
208 target_height, target_width = int(dimensions[0]), int(dimensions[1])
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
209 if target_height <= 0 or target_width <= 0:
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
210 raise ValueError(
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
211 f"Image resize must be positive integers, received {resize_value}."
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
212 )
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
213 logger.info(f"MetaFormer explicit resize: {target_height}x{target_width}")
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
214 else:
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
215 raise ValueError(resize_value)
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
216 except (ValueError, IndexError):
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
217 logger.warning(
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
218 "Invalid image resize format '%s'; falling back to model default %sx%s",
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
219 resize_value,
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
220 cfg_height,
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
221 cfg_width,
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
222 )
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
223 target_height, target_width = cfg_height, cfg_width
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
224 else:
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
225 image_zip_path = config_params.get("image_zip", "")
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
226 detected_height, detected_width = self._detect_image_dimensions(image_zip_path)
16
8729f69e9207 planemo upload for repository https://github.com/goeckslab/gleam.git commit bb4bcdc888d73bbfd85d78ce8999a1080fe813ff
goeckslab
parents: 15
diff changeset
227 target_height, target_width = detected_height, detected_width
8729f69e9207 planemo upload for repository https://github.com/goeckslab/gleam.git commit bb4bcdc888d73bbfd85d78ce8999a1080fe813ff
goeckslab
parents: 15
diff changeset
228 if use_pretrained and (detected_height, detected_width) != (cfg_height, cfg_width):
8729f69e9207 planemo upload for repository https://github.com/goeckslab/gleam.git commit bb4bcdc888d73bbfd85d78ce8999a1080fe813ff
goeckslab
parents: 15
diff changeset
229 logger.info(
8729f69e9207 planemo upload for repository https://github.com/goeckslab/gleam.git commit bb4bcdc888d73bbfd85d78ce8999a1080fe813ff
goeckslab
parents: 15
diff changeset
230 "MetaFormer pretrained weights expect %sx%s; proceeding with detected %sx%s",
8729f69e9207 planemo upload for repository https://github.com/goeckslab/gleam.git commit bb4bcdc888d73bbfd85d78ce8999a1080fe813ff
goeckslab
parents: 15
diff changeset
231 cfg_height,
8729f69e9207 planemo upload for repository https://github.com/goeckslab/gleam.git commit bb4bcdc888d73bbfd85d78ce8999a1080fe813ff
goeckslab
parents: 15
diff changeset
232 cfg_width,
8729f69e9207 planemo upload for repository https://github.com/goeckslab/gleam.git commit bb4bcdc888d73bbfd85d78ce8999a1080fe813ff
goeckslab
parents: 15
diff changeset
233 detected_height,
8729f69e9207 planemo upload for repository https://github.com/goeckslab/gleam.git commit bb4bcdc888d73bbfd85d78ce8999a1080fe813ff
goeckslab
parents: 15
diff changeset
234 detected_width,
8729f69e9207 planemo upload for repository https://github.com/goeckslab/gleam.git commit bb4bcdc888d73bbfd85d78ce8999a1080fe813ff
goeckslab
parents: 15
diff changeset
235 )
12
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
236 if target_height <= 0 or target_width <= 0:
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
237 raise ValueError(
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
238 f"Invalid detected image dimensions for MetaFormer: {target_height}x{target_width}."
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
239 )
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
240
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
241 metaformer_channels = cfg_channels
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
242 metaformer_resize = (target_height, target_width)
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
243
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
244 encoder_config = {
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
245 "type": "stacked_cnn",
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
246 "height": target_height,
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
247 "width": target_width,
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
248 "num_channels": metaformer_channels,
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
249 "output_size": 128,
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
250 "use_pretrained": use_pretrained,
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
251 "trainable": trainable,
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
252 "custom_model": custom_model,
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
253 }
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
254
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
255 elif isinstance(raw_encoder, dict):
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
256 # Handle image resize for regular encoders
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
257 # Note: Standard encoders like ResNet don't support height/width parameters
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
258 # Resize will be handled at the preprocessing level by Ludwig
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
259 if config_params.get("image_resize") and config_params["image_resize"] != "original":
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
260 logger.info(f"Resize requested: {config_params['image_resize']} for standard encoder. Resize will be handled at preprocessing level.")
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
261
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
262 encoder_config = {
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
263 **raw_encoder,
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
264 "use_pretrained": use_pretrained,
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
265 "trainable": trainable,
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
266 }
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
267 else:
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
268 encoder_config = {"type": raw_encoder}
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
269
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
270 batch_size_cfg = batch_size or "auto"
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
271
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
272 label_column_path = config_params.get("label_column_data_path")
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
273 label_series = None
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
274 label_metadata_hint = config_params.get("label_metadata") or {}
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
275 output_type_hint = config_params.get("output_type_hint")
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
276 num_unique_labels = int(label_metadata_hint.get("num_unique", 2))
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
277 numeric_binary_labels = bool(label_metadata_hint.get("is_numeric_binary", False))
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
278 likely_regression = bool(label_metadata_hint.get("likely_regression", False))
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
279 if label_column_path is not None and Path(label_column_path).exists():
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
280 try:
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
281 label_series = pd.read_csv(label_column_path)[LABEL_COLUMN_NAME]
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
282 non_na = label_series.dropna()
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
283 if not non_na.empty:
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
284 num_unique_labels = non_na.nunique()
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
285 is_numeric = ptypes.is_numeric_dtype(label_series.dtype)
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
286 numeric_binary_labels = is_numeric and num_unique_labels == 2
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
287 likely_regression = (
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
288 is_numeric and not numeric_binary_labels and num_unique_labels > 10
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
289 )
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
290 if numeric_binary_labels:
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
291 logger.info(
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
292 "Detected numeric binary labels in '%s'; configuring Ludwig for binary classification.",
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
293 LABEL_COLUMN_NAME,
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
294 )
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
295 except Exception as e:
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
296 logger.warning(f"Could not read label column for task detection: {e}")
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
297
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
298 if output_type_hint == "binary":
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
299 num_unique_labels = 2
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
300 numeric_binary_labels = numeric_binary_labels or bool(
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
301 label_metadata_hint.get("is_numeric", False)
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
302 )
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
303
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
304 if numeric_binary_labels:
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
305 task_type = "classification"
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
306 elif likely_regression:
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
307 task_type = "regression"
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
308 else:
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
309 task_type = "classification"
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
310
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
311 if task_type == "regression" and numeric_binary_labels:
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
312 logger.warning(
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
313 "Numeric binary labels detected but regression task chosen; forcing classification to avoid invalid Ludwig config."
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
314 )
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
315 task_type = "classification"
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
316
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
317 config_params["task_type"] = task_type
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
318
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
319 image_feat: Dict[str, Any] = {
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
320 "name": IMAGE_PATH_COLUMN_NAME,
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
321 "type": "image",
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
322 }
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
323 # Set preprocessing dimensions FIRST for MetaFormer models
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
324 if is_metaformer:
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
325 if metaformer_resize is None:
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
326 metaformer_resize = (224, 224)
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
327 height, width = metaformer_resize
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
328
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
329 # CRITICAL: Set preprocessing dimensions FIRST for MetaFormer models
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
330 # This is essential for MetaFormer models to work properly
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
331 if "preprocessing" not in image_feat:
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
332 image_feat["preprocessing"] = {}
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
333 image_feat["preprocessing"]["height"] = height
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
334 image_feat["preprocessing"]["width"] = width
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
335 # Use infer_image_dimensions=True to allow Ludwig to read images for validation
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
336 # but set explicit max dimensions to control the output size
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
337 image_feat["preprocessing"]["infer_image_dimensions"] = True
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
338 image_feat["preprocessing"]["infer_image_max_height"] = height
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
339 image_feat["preprocessing"]["infer_image_max_width"] = width
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
340 image_feat["preprocessing"]["num_channels"] = metaformer_channels
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
341 image_feat["preprocessing"]["resize_method"] = "interpolate" # Use interpolation for better quality
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
342 image_feat["preprocessing"]["standardize_image"] = "imagenet1k" # Use ImageNet standardization
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
343 # Force Ludwig to respect our dimensions by setting additional parameters
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
344 image_feat["preprocessing"]["requires_equal_dimensions"] = False
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
345 logger.info(f"Set preprocessing dimensions for MetaFormer: {height}x{width} (infer_dimensions=True with max dimensions to allow validation)")
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
346 # Now set the encoder configuration
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
347 image_feat["encoder"] = encoder_config
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
348
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
349 if config_params.get("augmentation") is not None:
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
350 image_feat["augmentation"] = config_params["augmentation"]
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
351
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
352 # Add resize configuration for standard encoders (ResNet, etc.)
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
353 # FIXED: MetaFormer models now respect user dimensions completely
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
354 # Previously there was a double resize issue where MetaFormer would force 224x224
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
355 # Now both MetaFormer and standard encoders respect user's resize choice
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
356 if (not is_metaformer) and config_params.get("image_resize") and config_params["image_resize"] != "original":
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
357 try:
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
358 dimensions = config_params["image_resize"].split("x")
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
359 if len(dimensions) == 2:
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
360 height, width = int(dimensions[0]), int(dimensions[1])
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
361 if height <= 0 or width <= 0:
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
362 raise ValueError(
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
363 f"Image resize must be positive integers, received {config_params['image_resize']}."
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
364 )
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
365
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
366 # Add resize to preprocessing for standard encoders
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
367 if "preprocessing" not in image_feat:
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
368 image_feat["preprocessing"] = {}
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
369 image_feat["preprocessing"]["height"] = height
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
370 image_feat["preprocessing"]["width"] = width
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
371 # Use infer_image_dimensions=True to allow Ludwig to read images for validation
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
372 # but set explicit max dimensions to control the output size
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
373 image_feat["preprocessing"]["infer_image_dimensions"] = True
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
374 image_feat["preprocessing"]["infer_image_max_height"] = height
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
375 image_feat["preprocessing"]["infer_image_max_width"] = width
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
376 logger.info(f"Added resize preprocessing: {height}x{width} for standard encoder with infer_image_dimensions=True and max dimensions")
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
377 except (ValueError, IndexError):
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
378 logger.warning(f"Invalid image resize format: {config_params['image_resize']}, skipping resize preprocessing")
15
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
379
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
380 def _resolve_validation_metric(task: str, requested: Optional[str]) -> Optional[str]:
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
381 """Pick a validation metric that Ludwig will accept for the resolved task."""
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
382 default_map = {
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
383 "regression": "pearson_r",
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
384 "binary": "roc_auc",
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
385 "category": "accuracy",
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
386 }
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
387 allowed_map = {
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
388 "regression": {
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
389 "pearson_r",
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
390 "mean_absolute_error",
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
391 "mean_squared_error",
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
392 "root_mean_squared_error",
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
393 "mean_absolute_percentage_error",
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
394 "r2",
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
395 "explained_variance",
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
396 "loss",
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
397 },
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
398 # Ludwig rejects f1 and balanced_accuracy for binary outputs; keep to known-safe set.
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
399 "binary": {
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
400 "roc_auc",
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
401 "accuracy",
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
402 "precision",
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
403 "recall",
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
404 "specificity",
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
405 "log_loss",
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
406 "loss",
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
407 },
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
408 "category": {
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
409 "accuracy",
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
410 "balanced_accuracy",
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
411 "precision",
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
412 "recall",
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
413 "f1",
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
414 "specificity",
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
415 "log_loss",
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
416 "loss",
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
417 },
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
418 }
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
419 alias_map = {
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
420 "regression": {
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
421 "mae": "mean_absolute_error",
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
422 "mse": "mean_squared_error",
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
423 "rmse": "root_mean_squared_error",
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
424 "mape": "mean_absolute_percentage_error",
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
425 },
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
426 }
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
427
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
428 default_metric = default_map.get(task)
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
429 allowed = allowed_map.get(task, set())
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
430 metric = requested or default_metric
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
431
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
432 if metric is None:
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
433 return None
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
434
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
435 metric = alias_map.get(task, {}).get(metric, metric)
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
436
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
437 if metric not in allowed:
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
438 if requested:
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
439 logger.warning(
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
440 f"Validation metric '{requested}' is not supported for {task} outputs; using '{default_metric}' instead."
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
441 )
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
442 metric = default_metric
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
443 return metric
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
444
12
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
445 if task_type == "regression":
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
446 output_feat = {
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
447 "name": LABEL_COLUMN_NAME,
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
448 "type": "number",
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
449 "decoder": {"type": "regressor"},
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
450 "loss": {"type": "mean_squared_error"},
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
451 }
15
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
452 val_metric = _resolve_validation_metric("regression", config_params.get("validation_metric"))
12
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
453
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
454 else:
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
455 if num_unique_labels == 2:
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
456 output_feat = {
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
457 "name": LABEL_COLUMN_NAME,
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
458 "type": "binary",
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
459 "loss": {"type": "binary_weighted_cross_entropy"},
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
460 }
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
461 if config_params.get("threshold") is not None:
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
462 output_feat["threshold"] = float(config_params["threshold"])
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
463 else:
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
464 output_feat = {
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
465 "name": LABEL_COLUMN_NAME,
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
466 "type": "category",
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
467 "loss": {"type": "softmax_cross_entropy"},
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
468 }
15
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
469 val_metric = _resolve_validation_metric(
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
470 "binary" if num_unique_labels == 2 else "category",
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
471 config_params.get("validation_metric"),
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
472 )
12
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
473
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
474 conf: Dict[str, Any] = {
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
475 "model_type": "ecd",
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
476 "input_features": [image_feat],
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
477 "output_features": [output_feat],
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
478 "combiner": {"type": "concat"},
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
479 "trainer": {
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
480 "epochs": epochs,
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
481 "early_stop": early_stop,
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
482 "batch_size": batch_size_cfg,
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
483 "learning_rate": learning_rate,
15
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
484 # set validation_metric when provided
12
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
485 **({"validation_metric": val_metric} if val_metric else {}),
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
486 },
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
487 "preprocessing": {
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
488 "split": split_config,
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
489 "num_processes": num_processes,
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
490 "in_memory": False,
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
491 },
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
492 }
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
493
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
494 logger.debug("LudwigDirectBackend: Config dict built.")
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
495 try:
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
496 yaml_str = yaml.dump(conf, sort_keys=False, indent=2)
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
497 logger.info("LudwigDirectBackend: YAML config generated.")
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
498 return yaml_str
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
499 except Exception:
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
500 logger.error(
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
501 "LudwigDirectBackend: Failed to serialize YAML.",
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
502 exc_info=True,
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
503 )
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
504 raise
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
505
15
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
506 def _patch_torchvision_download(self) -> None:
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
507 """
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
508 Torchvision weight downloads sometimes fail checksum validation behind
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
509 corporate proxies that rewrite binaries. Skip hash checking to allow
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
510 pre-trained weights to load in those environments.
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
511 """
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
512 if LudwigDirectBackend._torchvision_patched:
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
513 return
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
514 try:
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
515 import torch.hub as torch_hub
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
516
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
517 original = torch_hub.load_state_dict_from_url
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
518 original_download = torch_hub.download_url_to_file
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
519
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
520 def _no_hash(url, model_dir=None, map_location=None, progress=True, check_hash=False, file_name=None):
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
521 return original(
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
522 url,
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
523 model_dir=model_dir,
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
524 map_location=map_location,
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
525 progress=progress,
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
526 check_hash=False,
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
527 file_name=file_name,
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
528 )
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
529
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
530 def _download_no_hash(url, dst, hash_prefix=None, progress=True):
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
531 # Torchvision's download_url_to_file signature does not accept check_hash in older versions.
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
532 return original_download(url, dst, hash_prefix=None, progress=progress)
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
533
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
534 torch_hub.load_state_dict_from_url = _no_hash # type: ignore[assignment]
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
535 torch_hub.download_url_to_file = _download_no_hash # type: ignore[assignment]
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
536 LudwigDirectBackend._torchvision_patched = True
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
537 logger.info("Disabled torchvision weight hash verification to avoid proxy-corrupted downloads.")
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
538 except Exception as exc:
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
539 logger.warning(f"Could not patch torchvision download hash check: {exc}")
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
540
12
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
541 def run_experiment(
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
542 self,
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
543 dataset_path: Path,
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
544 config_path: Path,
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
545 output_dir: Path,
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
546 random_seed: int = 42,
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
547 ) -> None:
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
548 """Invoke Ludwig's internal experiment_cli function to run the experiment."""
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
549 logger.info("LudwigDirectBackend: Starting experiment execution.")
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
550
15
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
551 # Avoid strict hash validation for torchvision weights (common in proxied environments)
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
552 self._patch_torchvision_download()
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
553
12
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
554 try:
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
555 from ludwig.experiment import experiment_cli
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
556 except ImportError as e:
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
557 logger.error(
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
558 "LudwigDirectBackend: Could not import experiment_cli.",
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
559 exc_info=True,
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
560 )
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
561 raise RuntimeError("Ludwig import failed.") from e
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
562
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
563 output_dir.mkdir(parents=True, exist_ok=True)
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
564
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
565 try:
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
566 experiment_cli(
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
567 dataset=str(dataset_path),
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
568 config=str(config_path),
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
569 output_directory=str(output_dir),
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
570 random_seed=random_seed,
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
571 skip_preprocessing=True,
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
572 )
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
573 logger.info(
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
574 f"LudwigDirectBackend: Experiment completed. Results in {output_dir}"
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
575 )
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
576 except TypeError as e:
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
577 logger.error(
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
578 "LudwigDirectBackend: Argument mismatch in experiment_cli call.",
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
579 exc_info=True,
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
580 )
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
581 raise RuntimeError("Ludwig argument error.") from e
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
582 except Exception:
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
583 logger.error(
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
584 "LudwigDirectBackend: Experiment execution error.",
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
585 exc_info=True,
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
586 )
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
587 raise
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
588
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
589 def get_training_process(self, output_dir) -> Optional[Dict[str, Any]]:
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
590 """Retrieve the learning rate used in the most recent Ludwig run."""
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
591 output_dir = Path(output_dir)
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
592 exp_dirs = sorted(
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
593 output_dir.glob("experiment_run*"),
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
594 key=lambda p: p.stat().st_mtime,
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
595 )
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
596
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
597 if not exp_dirs:
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
598 logger.warning(f"No experiment run directories found in {output_dir}")
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
599 return None
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
600
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
601 progress_file = exp_dirs[-1] / "model" / "training_progress.json"
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
602 if not progress_file.exists():
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
603 logger.warning(f"No training_progress.json found in {progress_file}")
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
604 return None
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
605
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
606 try:
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
607 with progress_file.open("r", encoding="utf-8") as f:
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
608 data = json.load(f)
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
609 return {
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
610 "learning_rate": data.get("learning_rate"),
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
611 "batch_size": data.get("batch_size"),
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
612 "epoch": data.get("epoch"),
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
613 }
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
614 except Exception as e:
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
615 logger.warning(f"Failed to read training progress info: {e}")
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
616 return {}
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
617
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
618 def convert_parquet_to_csv(self, output_dir: Path):
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
619 """Convert the predictions Parquet file to CSV."""
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
620 output_dir = Path(output_dir)
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
621 exp_dirs = sorted(
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
622 output_dir.glob("experiment_run*"),
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
623 key=lambda p: p.stat().st_mtime,
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
624 )
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
625 if not exp_dirs:
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
626 logger.warning(f"No experiment run dirs found in {output_dir}")
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
627 return
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
628 exp_dir = exp_dirs[-1]
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
629 parquet_path = exp_dir / PREDICTIONS_PARQUET_FILE_NAME
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
630 csv_path = exp_dir / "predictions.csv"
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
631
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
632 # Check if parquet file exists before trying to convert
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
633 if not parquet_path.exists():
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
634 logger.info(f"Predictions parquet file not found at {parquet_path}, skipping conversion")
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
635 return
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
636
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
637 try:
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
638 df = pd.read_parquet(parquet_path)
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
639 df.to_csv(csv_path, index=False)
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
640 logger.info(f"Converted Parquet to CSV: {csv_path}")
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
641 except Exception as e:
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
642 logger.error(f"Error converting Parquet to CSV: {e}")
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
643
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
644 def generate_plots(self, output_dir: Path) -> None:
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
645 """Generate all registered Ludwig visualizations for the latest experiment run."""
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
646 logger.info("Generating all Ludwig visualizations…")
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
647
15
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
648 # Keep only lightweight plots (drop compare_performance/roc_curves)
12
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
649 test_plots = {
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
650 "roc_curves_from_test_statistics",
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
651 "confusion_matrix",
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
652 }
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
653 train_plots = {
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
654 "learning_curves",
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
655 "compare_classifiers_performance_subset",
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
656 }
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
657
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
658 output_dir = Path(output_dir)
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
659 exp_dirs = sorted(
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
660 output_dir.glob("experiment_run*"),
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
661 key=lambda p: p.stat().st_mtime,
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
662 )
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
663 if not exp_dirs:
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
664 logger.warning(f"No experiment run dirs found in {output_dir}")
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
665 return
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
666 exp_dir = exp_dirs[-1]
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
667
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
668 viz_dir = exp_dir / "visualizations"
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
669 viz_dir.mkdir(exist_ok=True)
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
670 train_viz = viz_dir / "train"
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
671 test_viz = viz_dir / "test"
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
672 train_viz.mkdir(parents=True, exist_ok=True)
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
673 test_viz.mkdir(parents=True, exist_ok=True)
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
674
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
675 def _check(p: Path) -> Optional[str]:
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
676 return str(p) if p.exists() else None
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
677
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
678 training_stats = _check(exp_dir / "training_statistics.json")
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
679 test_stats = _check(exp_dir / TEST_STATISTICS_FILE_NAME)
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
680 probs_path = _check(exp_dir / PREDICTIONS_PARQUET_FILE_NAME)
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
681 gt_metadata = _check(exp_dir / "model" / TRAIN_SET_METADATA_FILE_NAME)
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
682
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
683 dataset_path = None
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
684 split_file = None
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
685 desc = exp_dir / DESCRIPTION_FILE_NAME
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
686 if desc.exists():
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
687 with open(desc, "r") as f:
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
688 cfg = json.load(f)
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
689 dataset_path = _check(Path(cfg.get("dataset", "")))
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
690 split_file = _check(Path(get_split_path(cfg.get("dataset", ""))))
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
691
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
692 output_feature = ""
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
693 if desc.exists():
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
694 try:
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
695 output_feature = cfg["config"]["output_features"][0]["name"]
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
696 except Exception:
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
697 pass
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
698 if not output_feature and test_stats:
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
699 with open(test_stats, "r") as f:
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
700 stats = json.load(f)
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
701 output_feature = next(iter(stats.keys()), "")
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
702
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
703 viz_registry = get_visualizations_registry()
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
704 for viz_name, viz_func in viz_registry.items():
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
705 if viz_name in train_plots:
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
706 viz_dir_plot = train_viz
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
707 elif viz_name in test_plots:
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
708 viz_dir_plot = test_viz
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
709 else:
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
710 continue
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
711
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
712 try:
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
713 viz_func(
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
714 training_statistics=[training_stats] if training_stats else [],
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
715 test_statistics=[test_stats] if test_stats else [],
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
716 probabilities=[probs_path] if probs_path else [],
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
717 output_feature_name=output_feature,
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
718 ground_truth_split=2,
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
719 top_n_classes=[0],
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
720 top_k=3,
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
721 ground_truth_metadata=gt_metadata,
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
722 ground_truth=dataset_path,
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
723 split_file=split_file,
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
724 output_directory=str(viz_dir_plot),
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
725 normalize=False,
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
726 file_format="png",
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
727 )
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
728 logger.info(f"✔ Generated {viz_name}")
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
729 except Exception as e:
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
730 logger.warning(f"✘ Skipped {viz_name}: {e}")
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
731
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
732 logger.info(f"All visualizations written to {viz_dir}")
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
733
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
734 def generate_html_report(
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
735 self,
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
736 title: str,
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
737 output_dir: str,
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
738 config: dict,
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
739 split_info: str,
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
740 ) -> Path:
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
741 """Assemble an HTML report from visualizations under train_val/ and test/ folders."""
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
742 cwd = Path.cwd()
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
743 report_name = title.lower().replace(" ", "_") + "_report.html"
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
744 report_path = cwd / report_name
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
745 output_dir = Path(output_dir)
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
746 output_type = None
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
747
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
748 exp_dirs = sorted(
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
749 output_dir.glob("experiment_run*"),
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
750 key=lambda p: p.stat().st_mtime,
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
751 )
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
752 if not exp_dirs:
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
753 raise RuntimeError(f"No 'experiment*' dirs found in {output_dir}")
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
754 exp_dir = exp_dirs[-1]
15
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
755 train_set_metadata_path = exp_dir / "model" / TRAIN_SET_METADATA_FILE_NAME
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
756 label_metadata_path = config.get("label_column_data_path")
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
757 if label_metadata_path:
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
758 label_metadata_path = Path(label_metadata_path)
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
759
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
760 # Pull additional config details from description.json if available
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
761 config_for_summary = dict(config)
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
762 if "target_column" not in config_for_summary or not config_for_summary.get("target_column"):
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
763 config_for_summary["target_column"] = LABEL_COLUMN_NAME
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
764 desc_path = exp_dir / DESCRIPTION_FILE_NAME
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
765 if desc_path.exists():
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
766 try:
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
767 with open(desc_path, "r") as f:
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
768 desc_cfg = json.load(f).get("config", {})
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
769 encoder_cfg = (
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
770 desc_cfg.get("input_features", [{}])[0].get("encoder", {})
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
771 if isinstance(desc_cfg.get("input_features", [{}]), list)
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
772 else {}
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
773 )
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
774 output_cfg = (
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
775 desc_cfg.get("output_features", [{}])[0]
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
776 if isinstance(desc_cfg.get("output_features", [{}]), list)
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
777 else {}
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
778 )
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
779 trainer_cfg = desc_cfg.get("trainer", {}) if isinstance(desc_cfg, dict) else {}
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
780 loss_cfg = output_cfg.get("loss", {}) if isinstance(output_cfg, dict) else {}
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
781 opt_cfg = trainer_cfg.get("optimizer", {}) if isinstance(trainer_cfg, dict) else {}
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
782 clip_cfg = trainer_cfg.get("gradient_clipping", {}) if isinstance(trainer_cfg, dict) else {}
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
783
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
784 arch_type = encoder_cfg.get("type")
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
785 arch_variant = encoder_cfg.get("model_variant")
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
786 arch_name = None
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
787 if arch_type:
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
788 arch_base = str(arch_type).replace("_", " ").title()
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
789 arch_name = f"{arch_base} {arch_variant}" if arch_variant is not None else arch_base
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
790
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
791 summary_fields = {
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
792 "architecture": arch_name,
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
793 "model_variant": arch_variant,
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
794 "pretrained": encoder_cfg.get("use_pretrained"),
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
795 "trainable": encoder_cfg.get("trainable"),
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
796 "target_column": output_cfg.get("column"),
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
797 "task_type": output_cfg.get("type"),
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
798 "validation_metric": trainer_cfg.get("validation_metric"),
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
799 "loss_function": loss_cfg.get("type"),
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
800 "threshold": output_cfg.get("threshold"),
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
801 "total_epochs": trainer_cfg.get("epochs"),
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
802 "early_stop": trainer_cfg.get("early_stop"),
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
803 "batch_size": trainer_cfg.get("batch_size"),
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
804 "optimizer": opt_cfg.get("type"),
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
805 "learning_rate": trainer_cfg.get("learning_rate"),
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
806 "random_seed": desc_cfg.get("random_seed") or config.get("random_seed"),
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
807 "use_mixed_precision": trainer_cfg.get("use_mixed_precision"),
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
808 "gradient_clipping": clip_cfg.get("clipglobalnorm"),
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
809 }
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
810 for k, v in summary_fields.items():
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
811 if v is None:
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
812 continue
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
813 # Do not override user-passed target/image column names in config
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
814 if k in {"target_column", "image_column"} and config_for_summary.get(k):
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
815 continue
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
816 config_for_summary.setdefault(k, v)
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
817 except Exception as e: # pragma: no cover - defensive
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
818 logger.warning(f"Could not merge description.json into config summary: {e}")
12
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
819
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
820 base_viz_dir = exp_dir / "visualizations"
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
821 train_viz_dir = base_viz_dir / "train"
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
822 test_viz_dir = base_viz_dir / "test"
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
823
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
824 html = get_html_template()
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
825
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
826 # Extra CSS & JS: center Plotly and enable CSV download for predictions table
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
827 html += """
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
828 <style>
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
829 /* Center Plotly figures (both wrapper and native classes) */
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
830 .plotly-center { display: flex; justify-content: center; }
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
831 .plotly-center .plotly-graph-div, .plotly-center .js-plotly-plot { margin: 0 auto !important; }
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
832 .js-plotly-plot, .plotly-graph-div { margin-left: auto !important; margin-right: auto !important; }
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
833
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
834 /* Download button for predictions table */
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
835 .download-btn {
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
836 padding: 8px 12px;
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
837 border: 1px solid #4CAF50;
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
838 background: #4CAF50;
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
839 color: white;
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
840 border-radius: 6px;
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
841 cursor: pointer;
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
842 }
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
843 .download-btn:hover { filter: brightness(0.95); }
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
844 .preds-controls {
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
845 display: flex;
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
846 justify-content: flex-end;
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
847 gap: 8px;
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
848 margin: 8px 0;
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
849 }
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
850 </style>
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
851 <script>
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
852 function tableToCSV(table){
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
853 const rows = Array.from(table.querySelectorAll('tr'));
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
854 return rows.map(row =>
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
855 Array.from(row.querySelectorAll('th,td')).map(cell => {
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
856 let text = cell.innerText.replace(/\\r?\\n|\\r/g,' ').trim();
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
857 if (text.includes('"') || text.includes(',')) {
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
858 text = '"' + text.replace(/"/g,'""') + '"';
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
859 }
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
860 return text;
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
861 }).join(',')
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
862 ).join('\\n');
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
863 }
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
864 document.addEventListener('DOMContentLoaded', function(){
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
865 const btn = document.getElementById('downloadPredsCsv');
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
866 if(btn){
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
867 btn.addEventListener('click', function(){
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
868 const tbl = document.querySelector('.predictions-table');
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
869 if(!tbl){ alert('Predictions table not found.'); return; }
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
870 const csv = tableToCSV(tbl);
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
871 const blob = new Blob([csv], {type: 'text/csv;charset=utf-8;'});
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
872 const url = URL.createObjectURL(blob);
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
873 const a = document.createElement('a');
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
874 a.href = url;
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
875 a.download = 'ground_truth_vs_predictions.csv';
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
876 document.body.appendChild(a);
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
877 a.click();
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
878 document.body.removeChild(a);
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
879 URL.revokeObjectURL(url);
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
880 });
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
881 }
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
882 });
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
883 </script>
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
884 """
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
885 html += f"<h1>{title}</h1>"
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
886
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
887 metrics_html = ""
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
888 train_val_metrics_html = ""
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
889 test_metrics_html = ""
15
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
890 output_type = None
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
891 train_stats_path = exp_dir / "training_statistics.json"
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
892 test_stats_path = exp_dir / TEST_STATISTICS_FILE_NAME
12
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
893 try:
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
894 if train_stats_path.exists() and test_stats_path.exists():
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
895 with open(train_stats_path) as f:
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
896 train_stats = json.load(f)
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
897 with open(test_stats_path) as f:
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
898 test_stats = json.load(f)
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
899 output_type = detect_output_type(test_stats)
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
900 metrics_html = format_stats_table_html(train_stats, test_stats, output_type)
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
901 train_val_metrics_html = format_train_val_stats_table_html(
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
902 train_stats, test_stats
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
903 )
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
904 test_metrics_html = format_test_merged_stats_table_html(
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
905 extract_metrics_from_json(train_stats, test_stats, output_type)[
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
906 "test"
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
907 ], output_type
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
908 )
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
909 except Exception as e:
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
910 logger.warning(
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
911 f"Could not load stats for HTML report: {type(e).__name__}: {e}"
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
912 )
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
913
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
914 config_html = ""
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
915 training_progress = self.get_training_process(output_dir)
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
916 try:
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
917 config_html = format_config_table_html(
15
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
918 config_for_summary, split_info, training_progress, output_type
12
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
919 )
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
920 except Exception as e:
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
921 logger.warning(f"Could not load config for HTML report: {e}")
15
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
922 config_html = (
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
923 "<h2 style='text-align: center;'>Model and Training Summary</h2>"
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
924 "<p style='text-align:center; color:#666;'>Configuration details unavailable.</p>"
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
925 )
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
926 if not config_html:
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
927 config_html = (
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
928 "<h2 style='text-align: center;'>Model and Training Summary</h2>"
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
929 "<p style='text-align:center; color:#666;'>No configuration details found.</p>"
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
930 )
12
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
931
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
932 # ---------- image rendering with exclusions ----------
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
933 def render_img_section(
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
934 title: str,
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
935 dir_path: Path,
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
936 output_type: str = None,
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
937 exclude_names: Optional[set] = None,
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
938 ) -> str:
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
939 if not dir_path.exists():
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
940 return f"<h2>{title}</h2><p><em>Directory not found.</em></p>"
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
941
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
942 exclude_names = exclude_names or set()
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
943
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
944 imgs = list(dir_path.glob("*.png"))
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
945
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
946 # Exclude ROC curves and standard confusion matrices (keep only entropy version)
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
947 default_exclude = {
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
948 # "roc_curves.png", # Remove ROC curves from test tab
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
949 "confusion_matrix__label_top5.png", # Remove standard confusion matrix
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
950 "confusion_matrix__label_top10.png", # Remove duplicate
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
951 "confusion_matrix__label_top6.png", # Remove duplicate
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
952 "confusion_matrix_entropy__label_top10.png", # Keep only top5
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
953 "confusion_matrix_entropy__label_top6.png", # Keep only top5
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
954 }
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
955 title_is_test = title.lower().startswith("test")
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
956 if title_is_test and output_type == "binary":
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
957 default_exclude.update(
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
958 {
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
959 "confusion_matrix__label_top2.png",
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
960 "confusion_matrix_entropy__label_top2.png",
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
961 "roc_curves_from_prediction_statistics.png",
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
962 }
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
963 )
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
964 elif title_is_test and output_type == "category":
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
965 default_exclude.update(
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
966 {
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
967 "compare_classifiers_multiclass_multimetric__label_best10.png",
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
968 "compare_classifiers_multiclass_multimetric__label_sorted.png",
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
969 "compare_classifiers_multiclass_multimetric__label_worst10.png",
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
970 }
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
971 )
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
972
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
973 imgs = [
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
974 img
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
975 for img in imgs
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
976 if img.name not in default_exclude
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
977 and img.name not in exclude_names
15
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
978 and not (
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
979 "learning_curves" in img.stem
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
980 and "loss" in img.stem
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
981 and "label" in img.stem
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
982 )
12
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
983 ]
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
984
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
985 if not imgs:
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
986 return f"<h2>{title}</h2><p><em>No plots found.</em></p>"
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
987
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
988 # Sort images by name for consistent ordering (works with string and numeric labels)
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
989 imgs = sorted(imgs, key=lambda x: x.name)
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
990
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
991 html_section = ""
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
992 custom_titles = {
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
993 "compare_classifiers_multiclass_multimetric__label_top10": "Metric Comparison by Label",
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
994 "compare_classifiers_performance_from_prob": "Label Metric Comparison by Probability",
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
995 }
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
996 for img in imgs:
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
997 b64 = encode_image_to_base64(str(img))
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
998 default_title = img.stem.replace("_", " ").title()
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
999 img_title = custom_titles.get(img.stem, default_title)
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1000 html_section += (
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1001 f"<h2 style='text-align: center;'>{img_title}</h2>"
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1002 f'<div class="plot" style="margin-bottom:20px;text-align:center;">'
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1003 f'<img src="data:image/png;base64,{b64}" '
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1004 f'style="max-width:90%;max-height:600px;border:1px solid #ddd;" />'
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1005 f"</div>"
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1006 )
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1007 return html_section
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1008
15
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
1009 # Show performance first, then config
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
1010 tab1_content = metrics_html + config_html
12
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1011
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1012 tab2_content = train_val_metrics_html + render_img_section(
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1013 "Training and Validation Visualizations",
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1014 train_viz_dir,
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1015 output_type,
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1016 exclude_names={
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1017 "compare_classifiers_performance_from_prob.png",
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1018 "roc_curves_from_prediction_statistics.png",
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1019 "precision_recall_curves_from_prediction_statistics.png",
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1020 "precision_recall_curve.png",
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1021 },
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1022 )
15
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
1023 if train_stats_path.exists():
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
1024 try:
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
1025 if output_type == "regression":
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
1026 tv_plots = build_regression_train_val_plots(str(train_stats_path))
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
1027 else:
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
1028 tv_plots = build_train_validation_plots(str(train_stats_path))
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
1029 for plot in tv_plots:
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
1030 tab2_content += (
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
1031 f"<h2 style='text-align: center;'>{plot['title']}</h2>"
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
1032 f"<div class='plotly-center'>{plot['html']}</div>"
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
1033 )
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
1034 if tv_plots:
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
1035 logger.info(f"Generated {len(tv_plots)} train/val diagnostic plots")
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
1036 except Exception as e:
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
1037 logger.warning(f"Could not generate train/val plots: {e}")
12
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1038
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1039 # --- Predictions vs Ground Truth table (REGRESSION ONLY) ---
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1040 preds_section = ""
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1041 parquet_path = exp_dir / PREDICTIONS_PARQUET_FILE_NAME
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1042 if output_type == "regression" and parquet_path.exists():
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1043 try:
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1044 # 1) load predictions from Parquet
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1045 df_preds = pd.read_parquet(parquet_path).reset_index(drop=True)
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1046 # assume the column containing your model's prediction is named "prediction"
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1047 # or contains that substring:
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1048 pred_col = next(
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1049 (c for c in df_preds.columns if "prediction" in c.lower()),
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1050 None,
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1051 )
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1052 if pred_col is None:
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1053 raise ValueError("No prediction column found in Parquet output")
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1054 df_pred = df_preds[[pred_col]].rename(columns={pred_col: "prediction"})
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1055
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1056 # 2) load ground truth for the test split from prepared CSV
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1057 df_all = pd.read_csv(config["label_column_data_path"])
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1058 df_gt = df_all[df_all[SPLIT_COLUMN_NAME] == 2][
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1059 LABEL_COLUMN_NAME
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1060 ].reset_index(drop=True)
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1061 # 3) concatenate side-by-side
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1062 df_table = pd.concat([df_gt, df_pred], axis=1)
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1063 df_table.columns = [LABEL_COLUMN_NAME, "prediction"]
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1064
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1065 # 4) render as HTML
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1066 preds_html = df_table.to_html(index=False, classes="predictions-table")
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1067 preds_section = (
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1068 "<h2 style='text-align: center;'>Ground Truth vs. Predictions</h2>"
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1069 "<div class='preds-controls'>"
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1070 "<button id='downloadPredsCsv' class='download-btn'>Download CSV</button>"
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1071 "</div>"
15
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
1072 "<div class='scroll-rows-30' style='overflow-x:auto; overflow-y:auto; max-height:350px; margin-bottom:20px;'>"
12
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1073 + preds_html
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1074 + "</div>"
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1075 )
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1076 except Exception as e:
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1077 logger.warning(f"Could not build Predictions vs GT table: {e}")
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1078
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1079 tab3_content = test_metrics_html + preds_section
15
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
1080 test_plotly_added = False
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
1081
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
1082 if output_type == "regression" and train_stats_path.exists():
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
1083 try:
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
1084 test_plots = build_regression_test_plots(str(train_stats_path))
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
1085 for plot in test_plots:
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
1086 tab3_content += (
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
1087 f"<h2 style='text-align: center;'>{plot['title']}</h2>"
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
1088 f"<div class='plotly-center'>{plot['html']}</div>"
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
1089 )
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
1090 if test_plots:
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
1091 test_plotly_added = True
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
1092 logger.info(f"Generated {len(test_plots)} regression test plots")
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
1093 except Exception as e:
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
1094 logger.warning(f"Could not generate regression test plots: {e}")
12
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1095
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1096 if output_type in ("binary", "category") and test_stats_path.exists():
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1097 try:
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1098 interactive_plots = build_classification_plots(
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1099 str(test_stats_path),
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1100 str(train_stats_path) if train_stats_path.exists() else None,
15
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
1101 metadata_csv_path=str(label_metadata_path)
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
1102 if label_metadata_path and label_metadata_path.exists()
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
1103 else None,
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
1104 train_set_metadata_path=str(train_set_metadata_path)
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
1105 if train_set_metadata_path.exists()
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
1106 else None,
12
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1107 )
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1108 for plot in interactive_plots:
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1109 tab3_content += (
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1110 f"<h2 style='text-align: center;'>{plot['title']}</h2>"
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1111 f"<div class='plotly-center'>{plot['html']}</div>"
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1112 )
15
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
1113 if interactive_plots:
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
1114 test_plotly_added = True
12
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1115 logger.info(f"Generated {len(interactive_plots)} interactive Plotly plots")
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1116 except Exception as e:
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1117 logger.warning(f"Could not generate Plotly plots: {e}")
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1118
15
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
1119 # Add prediction diagnostics from predictions.csv
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
1120 predictions_csv_path = exp_dir / "predictions.csv"
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
1121 try:
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
1122 diag_plots = build_prediction_diagnostics(
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
1123 str(predictions_csv_path),
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
1124 label_data_path=str(config.get("label_column_data_path"))
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
1125 if config.get("label_column_data_path")
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
1126 else None,
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
1127 threshold=config.get("threshold"),
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
1128 )
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
1129 for plot in diag_plots:
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
1130 tab3_content += (
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
1131 f"<h2 style='text-align: center;'>{plot['title']}</h2>"
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
1132 f"<div class='plotly-center'>{plot['html']}</div>"
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
1133 )
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
1134 if diag_plots:
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
1135 test_plotly_added = True
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
1136 logger.info(f"Generated {len(diag_plots)} prediction diagnostic plots")
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
1137 except Exception as e:
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
1138 logger.warning(f"Could not generate prediction diagnostics: {e}")
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
1139
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
1140 # Fallback: include static PNGs if no interactive plots were added
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
1141 if not test_plotly_added:
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
1142 tab3_content += render_img_section(
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
1143 "Test Visualizations (PNG fallback)",
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
1144 test_viz_dir,
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
1145 output_type,
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
1146 )
d17e3a1b8659 planemo upload for repository https://github.com/goeckslab/gleam.git commit bc50fef8acb44aca15d0a1746e6c0c967da5bb17
goeckslab
parents: 12
diff changeset
1147
12
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1148 # Add static TEST PNGs (with default dedupe/exclusions)
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1149 tabbed_html = build_tabbed_html(tab1_content, tab2_content, tab3_content)
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1150 modal_html = get_metrics_help_modal()
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1151 html += tabbed_html + modal_html + get_html_closing()
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1152
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1153 try:
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1154 with open(report_path, "w") as f:
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1155 f.write(html)
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1156 logger.info(f"HTML report generated at: {report_path}")
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1157 except Exception as e:
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1158 logger.error(f"Failed to write HTML report: {e}")
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1159 raise
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1160
bcfa2e234a80 planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
goeckslab
parents:
diff changeset
1161 return report_path