Mercurial > repos > goeckslab > image_learner
annotate image_learner_cli.py @ 11:c5150cceab47 draft default tip
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
| author | goeckslab | 
|---|---|
| date | Sat, 18 Oct 2025 03:17:09 +0000 | 
| parents | b0d893d04d4c | 
| children | 
| rev | line source | 
|---|---|
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1 import argparse | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 2 import json | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 3 import logging | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 4 import os | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 5 import shutil | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 6 import sys | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 7 import tempfile | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 8 import zipfile | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 9 from pathlib import Path | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 10 from typing import Any, Dict, Optional, Protocol, Tuple | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 11 | 
| 11 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 12 import matplotlib | 
| 7 
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
 goeckslab parents: 
6diff
changeset | 13 import numpy as np | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 14 import pandas as pd | 
| 2 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 15 import pandas.api.types as ptypes | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 16 import yaml | 
| 2 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 17 from constants import ( | 
| 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 18 IMAGE_PATH_COLUMN_NAME, | 
| 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 19 LABEL_COLUMN_NAME, | 
| 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 20 METRIC_DISPLAY_NAMES, | 
| 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 21 MODEL_ENCODER_TEMPLATES, | 
| 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 22 SPLIT_COLUMN_NAME, | 
| 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 23 TEMP_CONFIG_FILENAME, | 
| 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 24 TEMP_CSV_FILENAME, | 
| 9 
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
 goeckslab parents: 
8diff
changeset | 25 TEMP_DIR_PREFIX, | 
| 2 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 26 ) | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 27 from ludwig.globals import ( | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 28 DESCRIPTION_FILE_NAME, | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 29 PREDICTIONS_PARQUET_FILE_NAME, | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 30 TEST_STATISTICS_FILE_NAME, | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 31 TRAIN_SET_METADATA_FILE_NAME, | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 32 ) | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 33 from ludwig.utils.data_utils import get_split_path | 
| 8 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 34 from plotly_plots import build_classification_plots | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 35 from sklearn.model_selection import train_test_split | 
| 2 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 36 from utils import ( | 
| 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 37 build_tabbed_html, | 
| 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 38 encode_image_to_base64, | 
| 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 39 get_html_closing, | 
| 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 40 get_html_template, | 
| 9 
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
 goeckslab parents: 
8diff
changeset | 41 get_metrics_help_modal, | 
| 2 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 42 ) | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 43 | 
| 11 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 44 # Set matplotlib backend after imports | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 45 matplotlib.use('Agg') | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 46 | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 47 # --- Logging Setup --- | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 48 logging.basicConfig( | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 49 level=logging.INFO, | 
| 9 
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
 goeckslab parents: 
8diff
changeset | 50 format="%(asctime)s %(levelname)s %(name)s: %(message)s", | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 51 ) | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 52 logger = logging.getLogger("ImageLearner") | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 53 | 
| 11 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 54 # Optional MetaFormer configuration registry | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 55 META_DEFAULT_CFGS: Dict[str, Any] = {} | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 56 try: | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 57 from MetaFormer import default_cfgs as META_DEFAULT_CFGS # type: ignore[attr-defined] | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 58 except Exception as e: | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 59 logger.debug("MetaFormer default configs unavailable: %s", e) | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 60 META_DEFAULT_CFGS = {} | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 61 | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 62 # Try to import Ludwig visualization registry (may fail due to optional dependencies) | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 63 # This must come AFTER logger is defined | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 64 _ludwig_viz_available = False | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 65 get_visualizations_registry = None | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 66 try: | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 67 from ludwig.visualize import get_visualizations_registry | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 68 _ludwig_viz_available = True | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 69 logger.info("Ludwig visualizations available") | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 70 except ImportError as e: | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 71 logger.warning(f"Ludwig visualizations not available: {e}. Will use fallback plots only.") | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 72 except Exception as e: | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 73 logger.warning(f"Ludwig visualizations not available due to dependency issues: {e}. Will use fallback plots only.") | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 74 | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 75 # --- MetaFormer patching integration --- | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 76 _metaformer_patch_ok = False | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 77 try: | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 78 from MetaFormer.metaformer_stacked_cnn import patch_ludwig_stacked_cnn as _mf_patch | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 79 if _mf_patch(): | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 80 _metaformer_patch_ok = True | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 81 logger.info("MetaFormer patching applied for Ludwig stacked_cnn encoder.") | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 82 except Exception as e: | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 83 logger.warning(f"MetaFormer stacked CNN not available: {e}") | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 84 _metaformer_patch_ok = False | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 85 | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 86 # Note: CAFormer models are now handled through MetaFormer framework | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 87 | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 88 | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 89 def format_config_table_html( | 
| 1 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 90 config: dict, | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 91 split_info: Optional[str] = None, | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 92 training_progress: dict = None, | 
| 8 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 93 output_type: Optional[str] = None, | 
| 1 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 94 ) -> str: | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 95 display_keys = [ | 
| 2 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 96 "task_type", | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 97 "model_name", | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 98 "epochs", | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 99 "batch_size", | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 100 "fine_tune", | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 101 "use_pretrained", | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 102 "learning_rate", | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 103 "random_seed", | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 104 "early_stop", | 
| 8 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 105 "threshold", | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 106 ] | 
| 9 
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
 goeckslab parents: 
8diff
changeset | 107 | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 108 rows = [] | 
| 11 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 109 | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 110 for key in display_keys: | 
| 8 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 111 val = config.get(key, None) | 
| 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 112 if key == "threshold": | 
| 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 113 if output_type != "binary": | 
| 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 114 continue | 
| 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 115 val = val if val is not None else 0.5 | 
| 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 116 val_str = f"{val:.2f}" | 
| 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 117 if val == 0.5: | 
| 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 118 val_str += " (default)" | 
| 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 119 else: | 
| 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 120 if key == "task_type": | 
| 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 121 val_str = val.title() if isinstance(val, str) else "N/A" | 
| 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 122 elif key == "batch_size": | 
| 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 123 if val is not None: | 
| 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 124 val_str = int(val) | 
| 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 125 else: | 
| 11 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 126 val = "auto" | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 127 val_str = "auto" | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 128 resolved_val = None | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 129 if val is None or val == "auto": | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 130 if training_progress: | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 131 resolved_val = training_progress.get("batch_size") | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 132 val = ( | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 133 "Auto-selected batch size by Ludwig:<br>" | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 134 f"<span style='font-size: 0.85em;'>" | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 135 f"{resolved_val if resolved_val else val}</span><br>" | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 136 "<span style='font-size: 0.85em;'>" | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 137 "Based on model architecture and training setup " | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 138 "(e.g., fine-tuning).<br>" | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 139 "See <a href='https://ludwig.ai/latest/configuration/trainer/" | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 140 "#trainer-parameters' target='_blank'>" | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 141 "Ludwig Trainer Parameters</a> for details." | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 142 "</span>" | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 143 ) | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 144 else: | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 145 val = ( | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 146 "Auto-selected by Ludwig<br>" | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 147 "<span style='font-size: 0.85em;'>" | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 148 "Automatically tuned based on architecture and dataset.<br>" | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 149 "See <a href='https://ludwig.ai/latest/configuration/trainer/" | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 150 "#trainer-parameters' target='_blank'>" | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 151 "Ludwig Trainer Parameters</a> for details." | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 152 "</span>" | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 153 ) | 
| 8 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 154 elif key == "learning_rate": | 
| 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 155 if val is not None and val != "auto": | 
| 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 156 val_str = f"{val:.6f}" | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 157 else: | 
| 8 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 158 if training_progress: | 
| 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 159 resolved_val = training_progress.get("learning_rate") | 
| 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 160 val_str = ( | 
| 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 161 "Auto-selected learning rate by Ludwig:<br>" | 
| 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 162 f"<span style='font-size: 0.85em;'>" | 
| 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 163 f"{resolved_val if resolved_val else 'auto'}</span><br>" | 
| 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 164 "<span style='font-size: 0.85em;'>" | 
| 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 165 "Based on model architecture and training setup " | 
| 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 166 "(e.g., fine-tuning).<br>" | 
| 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 167 "</span>" | 
| 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 168 ) | 
| 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 169 else: | 
| 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 170 val_str = ( | 
| 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 171 "Auto-selected by Ludwig<br>" | 
| 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 172 "<span style='font-size: 0.85em;'>" | 
| 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 173 "Automatically tuned based on architecture and dataset.<br>" | 
| 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 174 "See <a href='https://ludwig.ai/latest/configuration/trainer/" | 
| 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 175 "#trainer-parameters' target='_blank'>" | 
| 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 176 "Ludwig Trainer Parameters</a> for details." | 
| 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 177 "</span>" | 
| 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 178 ) | 
| 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 179 elif key == "epochs": | 
| 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 180 if val is None: | 
| 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 181 val_str = "N/A" | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 182 else: | 
| 8 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 183 if ( | 
| 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 184 training_progress | 
| 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 185 and "epoch" in training_progress | 
| 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 186 and val > training_progress["epoch"] | 
| 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 187 ): | 
| 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 188 val_str = ( | 
| 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 189 f"Because of early stopping: the training " | 
| 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 190 f"stopped at epoch {training_progress['epoch']}" | 
| 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 191 ) | 
| 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 192 else: | 
| 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 193 val_str = val | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 194 else: | 
| 8 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 195 val_str = val if val is not None else "N/A" | 
| 10 
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
 goeckslab parents: 
9diff
changeset | 196 if val_str == "N/A" and key not in ["task_type"]: | 
| 8 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 197 continue | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 198 rows.append( | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 199 f"<tr>" | 
| 10 
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
 goeckslab parents: 
9diff
changeset | 200 f"<td style='padding: 6px 12px; border: 1px solid #ccc; text-align: left; " | 
| 
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
 goeckslab parents: 
9diff
changeset | 201 f"white-space: normal; word-break: break-word; overflow-wrap: anywhere;'>" | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 202 f"{key.replace('_', ' ').title()}</td>" | 
| 10 
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
 goeckslab parents: 
9diff
changeset | 203 f"<td style='padding: 6px 12px; border: 1px solid #ccc; text-align: center; " | 
| 
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
 goeckslab parents: 
9diff
changeset | 204 f"white-space: normal; word-break: break-word; overflow-wrap: anywhere;'>" | 
| 8 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 205 f"{val_str}</td>" | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 206 f"</tr>" | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 207 ) | 
| 11 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 208 | 
| 2 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 209 aug_cfg = config.get("augmentation") | 
| 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 210 if aug_cfg: | 
| 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 211 types = [str(a.get("type", "")) for a in aug_cfg] | 
| 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 212 aug_val = ", ".join(types) | 
| 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 213 rows.append( | 
| 10 
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
 goeckslab parents: 
9diff
changeset | 214 f"<tr><td style='padding: 6px 12px; border: 1px solid #ccc; text-align: left; " | 
| 
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
 goeckslab parents: 
9diff
changeset | 215 f"white-space: normal; word-break: break-word; overflow-wrap: anywhere;'>Augmentation</td>" | 
| 
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
 goeckslab parents: 
9diff
changeset | 216 f"<td style='padding: 6px 12px; border: 1px solid #ccc; text-align: center; " | 
| 
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
 goeckslab parents: 
9diff
changeset | 217 f"white-space: normal; word-break: break-word; overflow-wrap: anywhere;'>{aug_val}</td></tr>" | 
| 2 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 218 ) | 
| 11 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 219 | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 220 if split_info: | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 221 rows.append( | 
| 10 
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
 goeckslab parents: 
9diff
changeset | 222 f"<tr><td style='padding: 6px 12px; border: 1px solid #ccc; text-align: left; " | 
| 
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
 goeckslab parents: 
9diff
changeset | 223 f"white-space: normal; word-break: break-word; overflow-wrap: anywhere;'>Data Split</td>" | 
| 
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
 goeckslab parents: 
9diff
changeset | 224 f"<td style='padding: 6px 12px; border: 1px solid #ccc; text-align: center; " | 
| 
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
 goeckslab parents: 
9diff
changeset | 225 f"white-space: normal; word-break: break-word; overflow-wrap: anywhere;'>{split_info}</td></tr>" | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 226 ) | 
| 11 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 227 | 
| 8 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 228 html = f""" | 
| 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 229 <h2 style="text-align: center;">Model and Training Summary</h2> | 
| 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 230 <div style="display: flex; justify-content: center;"> | 
| 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 231 <table style="border-collapse: collapse; width: 100%; table-layout: fixed;"> | 
| 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 232 <thead><tr> | 
| 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 233 <th style="padding: 10px; border: 1px solid #ccc; text-align: left; white-space: nowrap; overflow: hidden; text-overflow: ellipsis;">Parameter</th> | 
| 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 234 <th style="padding: 10px; border: 1px solid #ccc; text-align: center; white-space: nowrap; overflow: hidden; text-overflow: ellipsis;">Value</th> | 
| 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 235 </tr></thead> | 
| 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 236 <tbody> | 
| 9 
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
 goeckslab parents: 
8diff
changeset | 237 {"".join(rows)} | 
| 8 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 238 </tbody> | 
| 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 239 </table> | 
| 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 240 </div><br> | 
| 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 241 <p style="text-align: center; font-size: 0.9em;"> | 
| 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 242 Model trained using <a href="https://ludwig.ai/" target="_blank" rel="noopener noreferrer">Ludwig</a>. | 
| 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 243 <a href="https://ludwig.ai/latest/configuration/" target="_blank" rel="noopener noreferrer"> | 
| 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 244 Ludwig documentation provides detailed information about default model and training parameters | 
| 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 245 </a> | 
| 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 246 </p><hr> | 
| 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 247 """ | 
| 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 248 return html | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 249 | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 250 | 
| 1 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 251 def detect_output_type(test_stats): | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 252 """Detects if the output type is 'binary' or 'category' based on test statistics.""" | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 253 label_stats = test_stats.get("label", {}) | 
| 2 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 254 if "mean_squared_error" in label_stats: | 
| 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 255 return "regression" | 
| 1 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 256 per_class = label_stats.get("per_class_stats", {}) | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 257 if len(per_class) == 2: | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 258 return "binary" | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 259 return "category" | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 260 | 
| 1 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 261 | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 262 def extract_metrics_from_json( | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 263 train_stats: dict, | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 264 test_stats: dict, | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 265 output_type: str, | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 266 ) -> dict: | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 267 """Extracts relevant metrics from training and test statistics based on the output type.""" | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 268 metrics = {"training": {}, "validation": {}, "test": {}} | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 269 | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 270 def get_last_value(stats, key): | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 271 val = stats.get(key) | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 272 if isinstance(val, list) and val: | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 273 return val[-1] | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 274 elif isinstance(val, (int, float)): | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 275 return val | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 276 return None | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 277 | 
| 1 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 278 for split in ["training", "validation"]: | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 279 split_stats = train_stats.get(split, {}) | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 280 if not split_stats: | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 281 logging.warning(f"No statistics found for {split} split") | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 282 continue | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 283 label_stats = split_stats.get("label", {}) | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 284 if not label_stats: | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 285 logging.warning(f"No label statistics found for {split} split") | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 286 continue | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 287 if output_type == "binary": | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 288 metrics[split] = { | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 289 "accuracy": get_last_value(label_stats, "accuracy"), | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 290 "loss": get_last_value(label_stats, "loss"), | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 291 "precision": get_last_value(label_stats, "precision"), | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 292 "recall": get_last_value(label_stats, "recall"), | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 293 "specificity": get_last_value(label_stats, "specificity"), | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 294 "roc_auc": get_last_value(label_stats, "roc_auc"), | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 295 } | 
| 2 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 296 elif output_type == "regression": | 
| 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 297 metrics[split] = { | 
| 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 298 "loss": get_last_value(label_stats, "loss"), | 
| 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 299 "mean_absolute_error": get_last_value( | 
| 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 300 label_stats, "mean_absolute_error" | 
| 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 301 ), | 
| 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 302 "mean_absolute_percentage_error": get_last_value( | 
| 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 303 label_stats, "mean_absolute_percentage_error" | 
| 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 304 ), | 
| 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 305 "mean_squared_error": get_last_value(label_stats, "mean_squared_error"), | 
| 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 306 "root_mean_squared_error": get_last_value( | 
| 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 307 label_stats, "root_mean_squared_error" | 
| 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 308 ), | 
| 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 309 "root_mean_squared_percentage_error": get_last_value( | 
| 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 310 label_stats, "root_mean_squared_percentage_error" | 
| 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 311 ), | 
| 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 312 "r2": get_last_value(label_stats, "r2"), | 
| 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 313 } | 
| 1 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 314 else: | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 315 metrics[split] = { | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 316 "accuracy": get_last_value(label_stats, "accuracy"), | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 317 "accuracy_micro": get_last_value(label_stats, "accuracy_micro"), | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 318 "loss": get_last_value(label_stats, "loss"), | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 319 "roc_auc": get_last_value(label_stats, "roc_auc"), | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 320 "hits_at_k": get_last_value(label_stats, "hits_at_k"), | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 321 } | 
| 9 
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
 goeckslab parents: 
8diff
changeset | 322 | 
| 1 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 323 # Test metrics: dynamic extraction according to exclusions | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 324 test_label_stats = test_stats.get("label", {}) | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 325 if not test_label_stats: | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 326 logging.warning("No label statistics found for test split") | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 327 else: | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 328 combined_stats = test_stats.get("combined", {}) | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 329 overall_stats = test_label_stats.get("overall_stats", {}) | 
| 9 
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
 goeckslab parents: 
8diff
changeset | 330 | 
| 1 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 331 # Define exclusions | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 332 if output_type == "binary": | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 333 exclude = {"per_class_stats", "precision_recall_curve", "roc_curve"} | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 334 else: | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 335 exclude = {"per_class_stats", "confusion_matrix"} | 
| 9 
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
 goeckslab parents: 
8diff
changeset | 336 | 
| 1 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 337 # 1. Get all scalar test_label_stats not excluded | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 338 test_metrics = {} | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 339 for k, v in test_label_stats.items(): | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 340 if k in exclude: | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 341 continue | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 342 if k == "overall_stats": | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 343 continue | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 344 if isinstance(v, (int, float, str, bool)): | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 345 test_metrics[k] = v | 
| 9 
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
 goeckslab parents: 
8diff
changeset | 346 | 
| 1 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 347 # 2. Add overall_stats (flattened) | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 348 for k, v in overall_stats.items(): | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 349 test_metrics[k] = v | 
| 9 
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
 goeckslab parents: 
8diff
changeset | 350 | 
| 1 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 351 # 3. Optionally include combined/loss if present and not already | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 352 if "loss" in combined_stats and "loss" not in test_metrics: | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 353 test_metrics["loss"] = combined_stats["loss"] | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 354 metrics["test"] = test_metrics | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 355 return metrics | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 356 | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 357 | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 358 def generate_table_row(cells, styles): | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 359 """Helper function to generate an HTML table row.""" | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 360 return ( | 
| 1 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 361 "<tr>" | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 362 + "".join(f"<td style='{styles}'>{cell}</td>" for cell in cells) | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 363 + "</tr>" | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 364 ) | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 365 | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 366 | 
| 8 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 367 # ----------------------------------------- | 
| 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 368 # 2) MODEL PERFORMANCE (Train/Val/Test) TABLE | 
| 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 369 # ----------------------------------------- | 
| 11 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 370 def format_stats_table_html(train_stats: dict, test_stats: dict, output_type: str) -> str: | 
| 1 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 371 """Formats a combined HTML table for training, validation, and test metrics.""" | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 372 all_metrics = extract_metrics_from_json(train_stats, test_stats, output_type) | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 373 rows = [] | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 374 for metric_key in sorted(all_metrics["training"].keys()): | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 375 if ( | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 376 metric_key in all_metrics["validation"] | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 377 and metric_key in all_metrics["test"] | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 378 ): | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 379 display_name = METRIC_DISPLAY_NAMES.get( | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 380 metric_key, | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 381 metric_key.replace("_", " ").title(), | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 382 ) | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 383 t = all_metrics["training"].get(metric_key) | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 384 v = all_metrics["validation"].get(metric_key) | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 385 te = all_metrics["test"].get(metric_key) | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 386 if all(x is not None for x in [t, v, te]): | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 387 rows.append([display_name, f"{t:.4f}", f"{v:.4f}", f"{te:.4f}"]) | 
| 9 
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
 goeckslab parents: 
8diff
changeset | 388 | 
| 1 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 389 if not rows: | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 390 return "<table><tr><td>No metric values found.</td></tr></table>" | 
| 9 
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
 goeckslab parents: 
8diff
changeset | 391 | 
| 1 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 392 html = ( | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 393 "<h2 style='text-align: center;'>Model Performance Summary</h2>" | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 394 "<div style='display: flex; justify-content: center;'>" | 
| 8 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 395 "<table class='performance-summary' style='border-collapse: collapse;'>" | 
| 1 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 396 "<thead><tr>" | 
| 8 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 397 "<th class='sortable' style='padding: 10px; border: 1px solid #ccc; text-align: left; white-space: nowrap;'>Metric</th>" | 
| 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 398 "<th class='sortable' style='padding: 10px; border: 1px solid #ccc; text-align: center; white-space: nowrap;'>Train</th>" | 
| 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 399 "<th class='sortable' style='padding: 10px; border: 1px solid #ccc; text-align: center; white-space: nowrap;'>Validation</th>" | 
| 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 400 "<th class='sortable' style='padding: 10px; border: 1px solid #ccc; text-align: center; white-space: nowrap;'>Test</th>" | 
| 1 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 401 "</tr></thead><tbody>" | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 402 ) | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 403 for row in rows: | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 404 html += generate_table_row( | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 405 row, | 
| 9 
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
 goeckslab parents: 
8diff
changeset | 406 "padding: 10px; border: 1px solid #ccc; text-align: center; white-space: nowrap;", | 
| 1 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 407 ) | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 408 html += "</tbody></table></div><br>" | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 409 return html | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 410 | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 411 | 
| 8 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 412 # ------------------------------------------- | 
| 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 413 # 3) TRAIN/VALIDATION PERFORMANCE SUMMARY TABLE | 
| 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 414 # ------------------------------------------- | 
| 1 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 415 def format_train_val_stats_table_html(train_stats: dict, test_stats: dict) -> str: | 
| 11 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 416 """Format train/validation metrics into an HTML table.""" | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 417 all_metrics = extract_metrics_from_json(train_stats, test_stats, detect_output_type(test_stats)) | 
| 1 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 418 rows = [] | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 419 for metric_key in sorted(all_metrics["training"].keys()): | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 420 if metric_key in all_metrics["validation"]: | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 421 display_name = METRIC_DISPLAY_NAMES.get( | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 422 metric_key, | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 423 metric_key.replace("_", " ").title(), | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 424 ) | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 425 t = all_metrics["training"].get(metric_key) | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 426 v = all_metrics["validation"].get(metric_key) | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 427 if t is not None and v is not None: | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 428 rows.append([display_name, f"{t:.4f}", f"{v:.4f}"]) | 
| 9 
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
 goeckslab parents: 
8diff
changeset | 429 | 
| 1 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 430 if not rows: | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 431 return "<table><tr><td>No metric values found for Train/Validation.</td></tr></table>" | 
| 9 
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
 goeckslab parents: 
8diff
changeset | 432 | 
| 1 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 433 html = ( | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 434 "<h2 style='text-align: center;'>Train/Validation Performance Summary</h2>" | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 435 "<div style='display: flex; justify-content: center;'>" | 
| 8 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 436 "<table class='performance-summary' style='border-collapse: collapse;'>" | 
| 1 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 437 "<thead><tr>" | 
| 8 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 438 "<th class='sortable' style='padding: 10px; border: 1px solid #ccc; text-align: left; white-space: nowrap;'>Metric</th>" | 
| 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 439 "<th class='sortable' style='padding: 10px; border: 1px solid #ccc; text-align: center; white-space: nowrap;'>Train</th>" | 
| 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 440 "<th class='sortable' style='padding: 10px; border: 1px solid #ccc; text-align: center; white-space: nowrap;'>Validation</th>" | 
| 1 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 441 "</tr></thead><tbody>" | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 442 ) | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 443 for row in rows: | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 444 html += generate_table_row( | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 445 row, | 
| 9 
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
 goeckslab parents: 
8diff
changeset | 446 "padding: 10px; border: 1px solid #ccc; text-align: center; white-space: nowrap;", | 
| 1 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 447 ) | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 448 html += "</tbody></table></div><br>" | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 449 return html | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 450 | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 451 | 
| 8 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 452 # ----------------------------------------- | 
| 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 453 # 4) TEST‐ONLY PERFORMANCE SUMMARY TABLE | 
| 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 454 # ----------------------------------------- | 
| 2 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 455 def format_test_merged_stats_table_html( | 
| 11 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 456 test_metrics: Dict[str, Any], output_type: str | 
| 2 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 457 ) -> str: | 
| 11 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 458 """Format test metrics into an HTML table.""" | 
| 1 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 459 rows = [] | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 460 for key in sorted(test_metrics.keys()): | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 461 display_name = METRIC_DISPLAY_NAMES.get(key, key.replace("_", " ").title()) | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 462 value = test_metrics[key] | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 463 if value is not None: | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 464 rows.append([display_name, f"{value:.4f}"]) | 
| 9 
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
 goeckslab parents: 
8diff
changeset | 465 | 
| 1 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 466 if not rows: | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 467 return "<table><tr><td>No test metric values found.</td></tr></table>" | 
| 9 
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
 goeckslab parents: 
8diff
changeset | 468 | 
| 1 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 469 html = ( | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 470 "<h2 style='text-align: center;'>Test Performance Summary</h2>" | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 471 "<div style='display: flex; justify-content: center;'>" | 
| 8 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 472 "<table class='performance-summary' style='border-collapse: collapse;'>" | 
| 1 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 473 "<thead><tr>" | 
| 8 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 474 "<th class='sortable' style='padding: 10px; border: 1px solid #ccc; text-align: left; white-space: nowrap;'>Metric</th>" | 
| 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 475 "<th class='sortable' style='padding: 10px; border: 1px solid #ccc; text-align: center; white-space: nowrap;'>Test</th>" | 
| 1 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 476 "</tr></thead><tbody>" | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 477 ) | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 478 for row in rows: | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 479 html += generate_table_row( | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 480 row, | 
| 9 
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
 goeckslab parents: 
8diff
changeset | 481 "padding: 10px; border: 1px solid #ccc; text-align: center; white-space: nowrap;", | 
| 1 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 482 ) | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 483 html += "</tbody></table></div><br>" | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 484 return html | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 485 | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 486 | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 487 def split_data_0_2( | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 488 df: pd.DataFrame, | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 489 split_column: str, | 
| 7 
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
 goeckslab parents: 
6diff
changeset | 490 validation_size: float = 0.1, | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 491 random_state: int = 42, | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 492 label_column: Optional[str] = None, | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 493 ) -> pd.DataFrame: | 
| 1 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 494 """Given a DataFrame whose split_column only contains {0,2}, re-assign a portion of the 0s to become 1s (validation).""" | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 495 out = df.copy() | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 496 out[split_column] = pd.to_numeric(out[split_column], errors="coerce").astype(int) | 
| 11 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 497 | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 498 idx_train = out.index[out[split_column] == 0].tolist() | 
| 11 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 499 | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 500 if not idx_train: | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 501 logger.info("No rows with split=0; nothing to do.") | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 502 return out | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 503 stratify_arr = None | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 504 if label_column and label_column in out.columns: | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 505 label_counts = out.loc[idx_train, label_column].value_counts() | 
| 7 
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
 goeckslab parents: 
6diff
changeset | 506 if label_counts.size > 1: | 
| 
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
 goeckslab parents: 
6diff
changeset | 507 # Force stratify even with fewer samples - adjust validation_size if needed | 
| 
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
 goeckslab parents: 
6diff
changeset | 508 min_samples_per_class = label_counts.min() | 
| 
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
 goeckslab parents: 
6diff
changeset | 509 if min_samples_per_class * validation_size < 1: | 
| 
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
 goeckslab parents: 
6diff
changeset | 510 # Adjust validation_size to ensure at least 1 sample per class, but do not exceed original validation_size | 
| 9 
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
 goeckslab parents: 
8diff
changeset | 511 adjusted_validation_size = min( | 
| 
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
 goeckslab parents: 
8diff
changeset | 512 validation_size, 1.0 / min_samples_per_class | 
| 
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
 goeckslab parents: 
8diff
changeset | 513 ) | 
| 7 
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
 goeckslab parents: 
6diff
changeset | 514 if adjusted_validation_size != validation_size: | 
| 
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
 goeckslab parents: 
6diff
changeset | 515 validation_size = adjusted_validation_size | 
| 9 
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
 goeckslab parents: 
8diff
changeset | 516 logger.info( | 
| 
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
 goeckslab parents: 
8diff
changeset | 517 f"Adjusted validation_size to {validation_size:.3f} to ensure at least one sample per class in validation" | 
| 
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
 goeckslab parents: 
8diff
changeset | 518 ) | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 519 stratify_arr = out.loc[idx_train, label_column] | 
| 7 
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
 goeckslab parents: 
6diff
changeset | 520 logger.info("Using stratified split for validation set") | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 521 else: | 
| 7 
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
 goeckslab parents: 
6diff
changeset | 522 logger.warning("Only one label class found; cannot stratify") | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 523 if validation_size <= 0: | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 524 logger.info("validation_size <= 0; keeping all as train.") | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 525 return out | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 526 if validation_size >= 1: | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 527 logger.info("validation_size >= 1; moving all train → validation.") | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 528 out.loc[idx_train, split_column] = 1 | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 529 return out | 
| 7 
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
 goeckslab parents: 
6diff
changeset | 530 # Always try stratified split first | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 531 try: | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 532 train_idx, val_idx = train_test_split( | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 533 idx_train, | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 534 test_size=validation_size, | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 535 random_state=random_state, | 
| 1 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 536 stratify=stratify_arr, | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 537 ) | 
| 7 
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
 goeckslab parents: 
6diff
changeset | 538 logger.info("Successfully applied stratified split") | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 539 except ValueError as e: | 
| 7 
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
 goeckslab parents: 
6diff
changeset | 540 logger.warning(f"Stratified split failed ({e}); falling back to random split.") | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 541 train_idx, val_idx = train_test_split( | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 542 idx_train, | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 543 test_size=validation_size, | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 544 random_state=random_state, | 
| 1 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 545 stratify=None, | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 546 ) | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 547 out.loc[train_idx, split_column] = 0 | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 548 out.loc[val_idx, split_column] = 1 | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 549 out[split_column] = out[split_column].astype(int) | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 550 return out | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 551 | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 552 | 
| 7 
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
 goeckslab parents: 
6diff
changeset | 553 def create_stratified_random_split( | 
| 
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
 goeckslab parents: 
6diff
changeset | 554 df: pd.DataFrame, | 
| 
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
 goeckslab parents: 
6diff
changeset | 555 split_column: str, | 
| 
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
 goeckslab parents: 
6diff
changeset | 556 split_probabilities: list = [0.7, 0.1, 0.2], | 
| 
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
 goeckslab parents: 
6diff
changeset | 557 random_state: int = 42, | 
| 
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
 goeckslab parents: 
6diff
changeset | 558 label_column: Optional[str] = None, | 
| 
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
 goeckslab parents: 
6diff
changeset | 559 ) -> pd.DataFrame: | 
| 
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
 goeckslab parents: 
6diff
changeset | 560 """Create a stratified random split when no split column exists.""" | 
| 
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
 goeckslab parents: 
6diff
changeset | 561 out = df.copy() | 
| 11 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 562 | 
| 7 
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
 goeckslab parents: 
6diff
changeset | 563 # initialize split column | 
| 
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
 goeckslab parents: 
6diff
changeset | 564 out[split_column] = 0 | 
| 11 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 565 | 
| 7 
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
 goeckslab parents: 
6diff
changeset | 566 if not label_column or label_column not in out.columns: | 
| 9 
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
 goeckslab parents: 
8diff
changeset | 567 logger.warning( | 
| 
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
 goeckslab parents: 
8diff
changeset | 568 "No label column found; using random split without stratification" | 
| 
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
 goeckslab parents: 
8diff
changeset | 569 ) | 
| 7 
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
 goeckslab parents: 
6diff
changeset | 570 # fall back to simple random assignment | 
| 
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
 goeckslab parents: 
6diff
changeset | 571 indices = out.index.tolist() | 
| 
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
 goeckslab parents: 
6diff
changeset | 572 np.random.seed(random_state) | 
| 
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
 goeckslab parents: 
6diff
changeset | 573 np.random.shuffle(indices) | 
| 11 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 574 | 
| 7 
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
 goeckslab parents: 
6diff
changeset | 575 n_total = len(indices) | 
| 
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
 goeckslab parents: 
6diff
changeset | 576 n_train = int(n_total * split_probabilities[0]) | 
| 
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
 goeckslab parents: 
6diff
changeset | 577 n_val = int(n_total * split_probabilities[1]) | 
| 11 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 578 | 
| 7 
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
 goeckslab parents: 
6diff
changeset | 579 out.loc[indices[:n_train], split_column] = 0 | 
| 
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
 goeckslab parents: 
6diff
changeset | 580 out.loc[indices[n_train:n_train + n_val], split_column] = 1 | 
| 
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
 goeckslab parents: 
6diff
changeset | 581 out.loc[indices[n_train + n_val:], split_column] = 2 | 
| 11 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 582 | 
| 7 
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
 goeckslab parents: 
6diff
changeset | 583 return out.astype({split_column: int}) | 
| 11 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 584 | 
| 7 
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
 goeckslab parents: 
6diff
changeset | 585 # check if stratification is possible | 
| 
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
 goeckslab parents: 
6diff
changeset | 586 label_counts = out[label_column].value_counts() | 
| 
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
 goeckslab parents: 
6diff
changeset | 587 min_samples_per_class = label_counts.min() | 
| 11 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 588 | 
| 7 
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
 goeckslab parents: 
6diff
changeset | 589 # ensure we have enough samples for stratification: | 
| 
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
 goeckslab parents: 
6diff
changeset | 590 # Each class must have at least as many samples as the number of splits, | 
| 
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
 goeckslab parents: 
6diff
changeset | 591 # so that each split can receive at least one sample per class. | 
| 
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
 goeckslab parents: 
6diff
changeset | 592 min_samples_required = len(split_probabilities) | 
| 
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
 goeckslab parents: 
6diff
changeset | 593 if min_samples_per_class < min_samples_required: | 
| 
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
 goeckslab parents: 
6diff
changeset | 594 logger.warning( | 
| 
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
 goeckslab parents: 
6diff
changeset | 595 f"Insufficient samples per class for stratification (min: {min_samples_per_class}, required: {min_samples_required}); using random split" | 
| 
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
 goeckslab parents: 
6diff
changeset | 596 ) | 
| 
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
 goeckslab parents: 
6diff
changeset | 597 # fall back to simple random assignment | 
| 
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
 goeckslab parents: 
6diff
changeset | 598 indices = out.index.tolist() | 
| 
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
 goeckslab parents: 
6diff
changeset | 599 np.random.seed(random_state) | 
| 
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
 goeckslab parents: 
6diff
changeset | 600 np.random.shuffle(indices) | 
| 11 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 601 | 
| 7 
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
 goeckslab parents: 
6diff
changeset | 602 n_total = len(indices) | 
| 
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
 goeckslab parents: 
6diff
changeset | 603 n_train = int(n_total * split_probabilities[0]) | 
| 
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
 goeckslab parents: 
6diff
changeset | 604 n_val = int(n_total * split_probabilities[1]) | 
| 11 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 605 | 
| 7 
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
 goeckslab parents: 
6diff
changeset | 606 out.loc[indices[:n_train], split_column] = 0 | 
| 
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
 goeckslab parents: 
6diff
changeset | 607 out.loc[indices[n_train:n_train + n_val], split_column] = 1 | 
| 
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
 goeckslab parents: 
6diff
changeset | 608 out.loc[indices[n_train + n_val:], split_column] = 2 | 
| 11 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 609 | 
| 7 
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
 goeckslab parents: 
6diff
changeset | 610 return out.astype({split_column: int}) | 
| 11 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 611 | 
| 7 
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
 goeckslab parents: 
6diff
changeset | 612 logger.info("Using stratified random split for train/validation/test sets") | 
| 11 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 613 | 
| 7 
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
 goeckslab parents: 
6diff
changeset | 614 # first split: separate test set | 
| 
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
 goeckslab parents: 
6diff
changeset | 615 train_val_idx, test_idx = train_test_split( | 
| 
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
 goeckslab parents: 
6diff
changeset | 616 out.index.tolist(), | 
| 
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
 goeckslab parents: 
6diff
changeset | 617 test_size=split_probabilities[2], | 
| 
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
 goeckslab parents: 
6diff
changeset | 618 random_state=random_state, | 
| 
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
 goeckslab parents: 
6diff
changeset | 619 stratify=out[label_column], | 
| 
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
 goeckslab parents: 
6diff
changeset | 620 ) | 
| 11 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 621 | 
| 7 
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
 goeckslab parents: 
6diff
changeset | 622 # second split: separate training and validation from remaining data | 
| 9 
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
 goeckslab parents: 
8diff
changeset | 623 val_size_adjusted = split_probabilities[1] / ( | 
| 
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
 goeckslab parents: 
8diff
changeset | 624 split_probabilities[0] + split_probabilities[1] | 
| 
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
 goeckslab parents: 
8diff
changeset | 625 ) | 
| 7 
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
 goeckslab parents: 
6diff
changeset | 626 train_idx, val_idx = train_test_split( | 
| 
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
 goeckslab parents: 
6diff
changeset | 627 train_val_idx, | 
| 
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
 goeckslab parents: 
6diff
changeset | 628 test_size=val_size_adjusted, | 
| 
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
 goeckslab parents: 
6diff
changeset | 629 random_state=random_state, | 
| 11 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 630 stratify=out.loc[train_val_idx, label_column] if label_column and label_column in out.columns else None, | 
| 7 
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
 goeckslab parents: 
6diff
changeset | 631 ) | 
| 11 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 632 | 
| 7 
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
 goeckslab parents: 
6diff
changeset | 633 # assign split values | 
| 
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
 goeckslab parents: 
6diff
changeset | 634 out.loc[train_idx, split_column] = 0 | 
| 
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
 goeckslab parents: 
6diff
changeset | 635 out.loc[val_idx, split_column] = 1 | 
| 
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
 goeckslab parents: 
6diff
changeset | 636 out.loc[test_idx, split_column] = 2 | 
| 11 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 637 | 
| 7 
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
 goeckslab parents: 
6diff
changeset | 638 logger.info("Successfully applied stratified random split") | 
| 9 
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
 goeckslab parents: 
8diff
changeset | 639 logger.info( | 
| 
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
 goeckslab parents: 
8diff
changeset | 640 f"Split counts: Train={len(train_idx)}, Val={len(val_idx)}, Test={len(test_idx)}" | 
| 
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
 goeckslab parents: 
8diff
changeset | 641 ) | 
| 7 
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
 goeckslab parents: 
6diff
changeset | 642 return out.astype({split_column: int}) | 
| 
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
 goeckslab parents: 
6diff
changeset | 643 | 
| 
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
 goeckslab parents: 
6diff
changeset | 644 | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 645 class Backend(Protocol): | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 646 """Interface for a machine learning backend.""" | 
| 9 
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
 goeckslab parents: 
8diff
changeset | 647 | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 648 def prepare_config( | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 649 self, | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 650 config_params: Dict[str, Any], | 
| 1 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 651 split_config: Dict[str, Any], | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 652 ) -> str: | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 653 ... | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 654 | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 655 def run_experiment( | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 656 self, | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 657 dataset_path: Path, | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 658 config_path: Path, | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 659 output_dir: Path, | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 660 random_seed: int, | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 661 ) -> None: | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 662 ... | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 663 | 
| 2 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 664 def generate_plots(self, output_dir: Path) -> None: | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 665 ... | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 666 | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 667 def generate_html_report( | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 668 self, | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 669 title: str, | 
| 2 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 670 output_dir: str, | 
| 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 671 config: Dict[str, Any], | 
| 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 672 split_info: str, | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 673 ) -> Path: | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 674 ... | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 675 | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 676 | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 677 class LudwigDirectBackend: | 
| 1 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 678 """Backend for running Ludwig experiments directly via the internal experiment_cli function.""" | 
| 9 
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
 goeckslab parents: 
8diff
changeset | 679 | 
| 11 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 680 def _detect_image_dimensions(self, image_zip_path: str) -> Tuple[int, int]: | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 681 """Detect image dimensions from the first image in the dataset.""" | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 682 try: | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 683 import zipfile | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 684 from PIL import Image | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 685 import io | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 686 | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 687 # Check if image_zip is provided | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 688 if not image_zip_path: | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 689 logger.warning("No image zip provided, using default 224x224") | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 690 return 224, 224 | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 691 | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 692 # Extract first image to detect dimensions | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 693 with zipfile.ZipFile(image_zip_path, 'r') as z: | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 694 image_files = [f for f in z.namelist() if f.lower().endswith(('.png', '.jpg', '.jpeg'))] | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 695 if not image_files: | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 696 logger.warning("No image files found in zip, using default 224x224") | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 697 return 224, 224 | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 698 | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 699 # Check first image | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 700 with z.open(image_files[0]) as f: | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 701 img = Image.open(io.BytesIO(f.read())) | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 702 width, height = img.size | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 703 logger.info(f"Detected image dimensions: {width}x{height}") | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 704 return height, width # Return as (height, width) to match encoder config | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 705 | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 706 except Exception as e: | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 707 logger.warning(f"Error detecting image dimensions: {e}, using default 224x224") | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 708 return 224, 224 | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 709 | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 710 def prepare_config( | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 711 self, | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 712 config_params: Dict[str, Any], | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 713 split_config: Dict[str, Any], | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 714 ) -> str: | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 715 logger.info("LudwigDirectBackend: Preparing YAML configuration.") | 
| 9 
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
 goeckslab parents: 
8diff
changeset | 716 | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 717 model_name = config_params.get("model_name", "resnet18") | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 718 use_pretrained = config_params.get("use_pretrained", False) | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 719 fine_tune = config_params.get("fine_tune", False) | 
| 2 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 720 if use_pretrained: | 
| 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 721 trainable = bool(fine_tune) | 
| 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 722 else: | 
| 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 723 trainable = True | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 724 epochs = config_params.get("epochs", 10) | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 725 batch_size = config_params.get("batch_size") | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 726 num_processes = config_params.get("preprocessing_num_processes", 1) | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 727 early_stop = config_params.get("early_stop", None) | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 728 learning_rate = config_params.get("learning_rate") | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 729 learning_rate = "auto" if learning_rate is None else float(learning_rate) | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 730 raw_encoder = MODEL_ENCODER_TEMPLATES.get(model_name, model_name) | 
| 11 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 731 | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 732 # --- MetaFormer detection and config logic --- | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 733 def _is_metaformer(name: str) -> bool: | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 734 return isinstance(name, str) and name.startswith( | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 735 ( | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 736 "identityformer_", | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 737 "randformer_", | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 738 "poolformerv2_", | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 739 "convformer_", | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 740 "caformer_", | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 741 ) | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 742 ) | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 743 | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 744 # Check if this is a MetaFormer model (either direct name or in custom_model) | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 745 is_metaformer = ( | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 746 _is_metaformer(model_name) | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 747 or (isinstance(raw_encoder, dict) and "custom_model" in raw_encoder and _is_metaformer(raw_encoder["custom_model"])) | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 748 ) | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 749 | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 750 metaformer_resize: Optional[Tuple[int, int]] = None | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 751 metaformer_channels = 3 | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 752 | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 753 if is_metaformer: | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 754 # Handle MetaFormer models | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 755 custom_model = None | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 756 if isinstance(raw_encoder, dict) and "custom_model" in raw_encoder: | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 757 custom_model = raw_encoder["custom_model"] | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 758 else: | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 759 custom_model = model_name | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 760 | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 761 logger.info(f"DETECTED MetaFormer model: {custom_model}") | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 762 cfg_channels, cfg_height, cfg_width = 3, 224, 224 | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 763 if META_DEFAULT_CFGS: | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 764 model_cfg = META_DEFAULT_CFGS.get(custom_model, {}) | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 765 input_size = model_cfg.get("input_size") | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 766 if isinstance(input_size, (list, tuple)) and len(input_size) == 3: | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 767 cfg_channels, cfg_height, cfg_width = ( | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 768 int(input_size[0]), | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 769 int(input_size[1]), | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 770 int(input_size[2]), | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 771 ) | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 772 | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 773 target_height, target_width = cfg_height, cfg_width | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 774 resize_value = config_params.get("image_resize") | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 775 if resize_value and resize_value != "original": | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 776 try: | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 777 dimensions = resize_value.split("x") | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 778 if len(dimensions) == 2: | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 779 target_height, target_width = int(dimensions[0]), int(dimensions[1]) | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 780 if target_height <= 0 or target_width <= 0: | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 781 raise ValueError( | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 782 f"Image resize must be positive integers, received {resize_value}." | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 783 ) | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 784 logger.info(f"MetaFormer explicit resize: {target_height}x{target_width}") | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 785 else: | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 786 raise ValueError(resize_value) | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 787 except (ValueError, IndexError): | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 788 logger.warning( | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 789 "Invalid image resize format '%s'; falling back to model default %sx%s", | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 790 resize_value, | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 791 cfg_height, | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 792 cfg_width, | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 793 ) | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 794 target_height, target_width = cfg_height, cfg_width | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 795 else: | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 796 image_zip_path = config_params.get("image_zip", "") | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 797 detected_height, detected_width = self._detect_image_dimensions(image_zip_path) | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 798 if use_pretrained: | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 799 if (detected_height, detected_width) != (cfg_height, cfg_width): | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 800 logger.info( | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 801 "MetaFormer pretrained weights expect %sx%s; resizing from detected %sx%s", | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 802 cfg_height, | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 803 cfg_width, | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 804 detected_height, | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 805 detected_width, | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 806 ) | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 807 else: | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 808 target_height, target_width = detected_height, detected_width | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 809 if target_height <= 0 or target_width <= 0: | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 810 raise ValueError( | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 811 f"Invalid detected image dimensions for MetaFormer: {target_height}x{target_width}." | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 812 ) | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 813 | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 814 metaformer_channels = cfg_channels | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 815 metaformer_resize = (target_height, target_width) | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 816 | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 817 encoder_config = { | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 818 "type": "stacked_cnn", | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 819 "height": target_height, | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 820 "width": target_width, | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 821 "num_channels": metaformer_channels, | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 822 "output_size": 128, | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 823 "use_pretrained": use_pretrained, | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 824 "trainable": trainable, | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 825 "custom_model": custom_model, | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 826 } | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 827 | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 828 elif isinstance(raw_encoder, dict): | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 829 # Handle image resize for regular encoders | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 830 # Note: Standard encoders like ResNet don't support height/width parameters | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 831 # Resize will be handled at the preprocessing level by Ludwig | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 832 if config_params.get("image_resize") and config_params["image_resize"] != "original": | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 833 logger.info(f"Resize requested: {config_params['image_resize']} for standard encoder. Resize will be handled at preprocessing level.") | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 834 | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 835 encoder_config = { | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 836 **raw_encoder, | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 837 "use_pretrained": use_pretrained, | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 838 "trainable": trainable, | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 839 } | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 840 else: | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 841 encoder_config = {"type": raw_encoder} | 
| 9 
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
 goeckslab parents: 
8diff
changeset | 842 | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 843 batch_size_cfg = batch_size or "auto" | 
| 9 
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
 goeckslab parents: 
8diff
changeset | 844 | 
| 1 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 845 label_column_path = config_params.get("label_column_data_path") | 
| 2 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 846 label_series = None | 
| 1 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 847 if label_column_path is not None and Path(label_column_path).exists(): | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 848 try: | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 849 label_series = pd.read_csv(label_column_path)[LABEL_COLUMN_NAME] | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 850 except Exception as e: | 
| 2 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 851 logger.warning(f"Could not read label column for task detection: {e}") | 
| 9 
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
 goeckslab parents: 
8diff
changeset | 852 | 
| 2 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 853 if ( | 
| 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 854 label_series is not None | 
| 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 855 and ptypes.is_numeric_dtype(label_series.dtype) | 
| 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 856 and label_series.nunique() > 10 | 
| 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 857 ): | 
| 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 858 task_type = "regression" | 
| 1 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 859 else: | 
| 2 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 860 task_type = "classification" | 
| 9 
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
 goeckslab parents: 
8diff
changeset | 861 | 
| 2 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 862 config_params["task_type"] = task_type | 
| 9 
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
 goeckslab parents: 
8diff
changeset | 863 | 
| 2 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 864 image_feat: Dict[str, Any] = { | 
| 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 865 "name": IMAGE_PATH_COLUMN_NAME, | 
| 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 866 "type": "image", | 
| 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 867 } | 
| 11 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 868 # Set preprocessing dimensions FIRST for MetaFormer models | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 869 if is_metaformer: | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 870 if metaformer_resize is None: | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 871 metaformer_resize = (224, 224) | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 872 height, width = metaformer_resize | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 873 | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 874 # CRITICAL: Set preprocessing dimensions FIRST for MetaFormer models | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 875 # This is essential for MetaFormer models to work properly | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 876 if "preprocessing" not in image_feat: | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 877 image_feat["preprocessing"] = {} | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 878 image_feat["preprocessing"]["height"] = height | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 879 image_feat["preprocessing"]["width"] = width | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 880 # Use infer_image_dimensions=True to allow Ludwig to read images for validation | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 881 # but set explicit max dimensions to control the output size | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 882 image_feat["preprocessing"]["infer_image_dimensions"] = True | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 883 image_feat["preprocessing"]["infer_image_max_height"] = height | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 884 image_feat["preprocessing"]["infer_image_max_width"] = width | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 885 image_feat["preprocessing"]["num_channels"] = metaformer_channels | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 886 image_feat["preprocessing"]["resize_method"] = "interpolate" # Use interpolation for better quality | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 887 image_feat["preprocessing"]["standardize_image"] = "imagenet1k" # Use ImageNet standardization | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 888 # Force Ludwig to respect our dimensions by setting additional parameters | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 889 image_feat["preprocessing"]["requires_equal_dimensions"] = False | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 890 logger.info(f"Set preprocessing dimensions for MetaFormer: {height}x{width} (infer_dimensions=True with max dimensions to allow validation)") | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 891 # Now set the encoder configuration | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 892 image_feat["encoder"] = encoder_config | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 893 | 
| 2 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 894 if config_params.get("augmentation") is not None: | 
| 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 895 image_feat["augmentation"] = config_params["augmentation"] | 
| 9 
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
 goeckslab parents: 
8diff
changeset | 896 | 
| 11 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 897 # Add resize configuration for standard encoders (ResNet, etc.) | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 898 # FIXED: MetaFormer models now respect user dimensions completely | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 899 # Previously there was a double resize issue where MetaFormer would force 224x224 | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 900 # Now both MetaFormer and standard encoders respect user's resize choice | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 901 if (not is_metaformer) and config_params.get("image_resize") and config_params["image_resize"] != "original": | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 902 try: | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 903 dimensions = config_params["image_resize"].split("x") | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 904 if len(dimensions) == 2: | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 905 height, width = int(dimensions[0]), int(dimensions[1]) | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 906 if height <= 0 or width <= 0: | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 907 raise ValueError( | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 908 f"Image resize must be positive integers, received {config_params['image_resize']}." | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 909 ) | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 910 | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 911 # Add resize to preprocessing for standard encoders | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 912 if "preprocessing" not in image_feat: | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 913 image_feat["preprocessing"] = {} | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 914 image_feat["preprocessing"]["height"] = height | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 915 image_feat["preprocessing"]["width"] = width | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 916 # Use infer_image_dimensions=True to allow Ludwig to read images for validation | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 917 # but set explicit max dimensions to control the output size | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 918 image_feat["preprocessing"]["infer_image_dimensions"] = True | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 919 image_feat["preprocessing"]["infer_image_max_height"] = height | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 920 image_feat["preprocessing"]["infer_image_max_width"] = width | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 921 logger.info(f"Added resize preprocessing: {height}x{width} for standard encoder with infer_image_dimensions=True and max dimensions") | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 922 except (ValueError, IndexError): | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 923 logger.warning(f"Invalid image resize format: {config_params['image_resize']}, skipping resize preprocessing") | 
| 2 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 924 if task_type == "regression": | 
| 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 925 output_feat = { | 
| 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 926 "name": LABEL_COLUMN_NAME, | 
| 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 927 "type": "number", | 
| 11 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 928 "decoder": {"type": "regressor", "input_size": 1}, | 
| 2 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 929 "loss": {"type": "mean_squared_error"}, | 
| 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 930 "evaluation": { | 
| 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 931 "metrics": [ | 
| 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 932 "mean_squared_error", | 
| 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 933 "mean_absolute_error", | 
| 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 934 "r2", | 
| 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 935 ] | 
| 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 936 }, | 
| 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 937 } | 
| 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 938 val_metric = config_params.get("validation_metric", "mean_squared_error") | 
| 9 
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
 goeckslab parents: 
8diff
changeset | 939 | 
| 2 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 940 else: | 
| 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 941 num_unique_labels = ( | 
| 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 942 label_series.nunique() if label_series is not None else 2 | 
| 1 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 943 ) | 
| 2 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 944 output_type = "binary" if num_unique_labels == 2 else "category" | 
| 11 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 945 # Determine if this is regression or classification based on label type | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 946 is_regression = ( | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 947 label_series is not None | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 948 and ptypes.is_numeric_dtype(label_series.dtype) | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 949 and label_series.nunique() > 10 | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 950 ) | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 951 | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 952 if is_regression: | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 953 output_feat = { | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 954 "name": LABEL_COLUMN_NAME, | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 955 "type": "number", | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 956 "decoder": {"type": "regressor", "input_size": 1}, | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 957 "loss": {"type": "mean_squared_error"}, | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 958 } | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 959 else: | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 960 if num_unique_labels == 2: | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 961 output_feat = { | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 962 "name": LABEL_COLUMN_NAME, | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 963 "type": "binary", | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 964 "decoder": {"type": "classifier", "input_size": 1}, | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 965 "loss": {"type": "softmax_cross_entropy"}, | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 966 } | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 967 else: | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 968 output_feat = { | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 969 "name": LABEL_COLUMN_NAME, | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 970 "type": "category", | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 971 "decoder": {"type": "classifier", "input_size": num_unique_labels}, | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 972 "loss": {"type": "softmax_cross_entropy"}, | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 973 } | 
| 8 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 974 if output_type == "binary" and config_params.get("threshold") is not None: | 
| 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 975 output_feat["threshold"] = float(config_params["threshold"]) | 
| 2 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 976 val_metric = None | 
| 9 
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
 goeckslab parents: 
8diff
changeset | 977 | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 978 conf: Dict[str, Any] = { | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 979 "model_type": "ecd", | 
| 2 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 980 "input_features": [image_feat], | 
| 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 981 "output_features": [output_feat], | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 982 "combiner": {"type": "concat"}, | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 983 "trainer": { | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 984 "epochs": epochs, | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 985 "early_stop": early_stop, | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 986 "batch_size": batch_size_cfg, | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 987 "learning_rate": learning_rate, | 
| 2 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 988 # only set validation_metric for regression | 
| 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 989 **({"validation_metric": val_metric} if val_metric else {}), | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 990 }, | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 991 "preprocessing": { | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 992 "split": split_config, | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 993 "num_processes": num_processes, | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 994 "in_memory": False, | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 995 }, | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 996 } | 
| 9 
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
 goeckslab parents: 
8diff
changeset | 997 | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 998 logger.debug("LudwigDirectBackend: Config dict built.") | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 999 try: | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1000 yaml_str = yaml.dump(conf, sort_keys=False, indent=2) | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1001 logger.info("LudwigDirectBackend: YAML config generated.") | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1002 return yaml_str | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1003 except Exception: | 
| 1 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 1004 logger.error( | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 1005 "LudwigDirectBackend: Failed to serialize YAML.", | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 1006 exc_info=True, | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 1007 ) | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1008 raise | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1009 | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1010 def run_experiment( | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1011 self, | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1012 dataset_path: Path, | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1013 config_path: Path, | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1014 output_dir: Path, | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1015 random_seed: int = 42, | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1016 ) -> None: | 
| 1 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 1017 """Invoke Ludwig's internal experiment_cli function to run the experiment.""" | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1018 logger.info("LudwigDirectBackend: Starting experiment execution.") | 
| 9 
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
 goeckslab parents: 
8diff
changeset | 1019 | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1020 try: | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1021 from ludwig.experiment import experiment_cli | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1022 except ImportError as e: | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1023 logger.error( | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1024 "LudwigDirectBackend: Could not import experiment_cli.", | 
| 1 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 1025 exc_info=True, | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1026 ) | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1027 raise RuntimeError("Ludwig import failed.") from e | 
| 9 
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
 goeckslab parents: 
8diff
changeset | 1028 | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1029 output_dir.mkdir(parents=True, exist_ok=True) | 
| 9 
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
 goeckslab parents: 
8diff
changeset | 1030 | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1031 try: | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1032 experiment_cli( | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1033 dataset=str(dataset_path), | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1034 config=str(config_path), | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1035 output_directory=str(output_dir), | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1036 random_seed=random_seed, | 
| 11 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1037 skip_preprocessing=True, | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1038 ) | 
| 1 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 1039 logger.info( | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 1040 f"LudwigDirectBackend: Experiment completed. Results in {output_dir}" | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 1041 ) | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1042 except TypeError as e: | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1043 logger.error( | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1044 "LudwigDirectBackend: Argument mismatch in experiment_cli call.", | 
| 1 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 1045 exc_info=True, | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1046 ) | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1047 raise RuntimeError("Ludwig argument error.") from e | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1048 except Exception: | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1049 logger.error( | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1050 "LudwigDirectBackend: Experiment execution error.", | 
| 1 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 1051 exc_info=True, | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1052 ) | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1053 raise | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1054 | 
| 2 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 1055 def get_training_process(self, output_dir) -> Optional[Dict[str, Any]]: | 
| 1 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 1056 """Retrieve the learning rate used in the most recent Ludwig run.""" | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1057 output_dir = Path(output_dir) | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1058 exp_dirs = sorted( | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1059 output_dir.glob("experiment_run*"), | 
| 1 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 1060 key=lambda p: p.stat().st_mtime, | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1061 ) | 
| 9 
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
 goeckslab parents: 
8diff
changeset | 1062 | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1063 if not exp_dirs: | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1064 logger.warning(f"No experiment run directories found in {output_dir}") | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1065 return None | 
| 9 
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
 goeckslab parents: 
8diff
changeset | 1066 | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1067 progress_file = exp_dirs[-1] / "model" / "training_progress.json" | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1068 if not progress_file.exists(): | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1069 logger.warning(f"No training_progress.json found in {progress_file}") | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1070 return None | 
| 9 
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
 goeckslab parents: 
8diff
changeset | 1071 | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1072 try: | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1073 with progress_file.open("r", encoding="utf-8") as f: | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1074 data = json.load(f) | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1075 return { | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1076 "learning_rate": data.get("learning_rate"), | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1077 "batch_size": data.get("batch_size"), | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1078 "epoch": data.get("epoch"), | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1079 } | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1080 except Exception as e: | 
| 1 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 1081 logger.warning(f"Failed to read training progress info: {e}") | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1082 return {} | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1083 | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1084 def convert_parquet_to_csv(self, output_dir: Path): | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1085 """Convert the predictions Parquet file to CSV.""" | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1086 output_dir = Path(output_dir) | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1087 exp_dirs = sorted( | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1088 output_dir.glob("experiment_run*"), | 
| 1 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 1089 key=lambda p: p.stat().st_mtime, | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1090 ) | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1091 if not exp_dirs: | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1092 logger.warning(f"No experiment run dirs found in {output_dir}") | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1093 return | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1094 exp_dir = exp_dirs[-1] | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1095 parquet_path = exp_dir / PREDICTIONS_PARQUET_FILE_NAME | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1096 csv_path = exp_dir / "predictions.csv" | 
| 11 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1097 | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1098 # Check if parquet file exists before trying to convert | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1099 if not parquet_path.exists(): | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1100 logger.info(f"Predictions parquet file not found at {parquet_path}, skipping conversion") | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1101 return | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1102 | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1103 try: | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1104 df = pd.read_parquet(parquet_path) | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1105 df.to_csv(csv_path, index=False) | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1106 logger.info(f"Converted Parquet to CSV: {csv_path}") | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1107 except Exception as e: | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1108 logger.error(f"Error converting Parquet to CSV: {e}") | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1109 | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1110 def generate_plots(self, output_dir: Path) -> None: | 
| 1 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 1111 """Generate all registered Ludwig visualizations for the latest experiment run.""" | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1112 logger.info("Generating all Ludwig visualizations…") | 
| 9 
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
 goeckslab parents: 
8diff
changeset | 1113 | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1114 test_plots = { | 
| 1 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 1115 "compare_performance", | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 1116 "compare_classifiers_performance_from_prob", | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 1117 "compare_classifiers_performance_from_pred", | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 1118 "compare_classifiers_performance_changing_k", | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 1119 "compare_classifiers_multiclass_multimetric", | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 1120 "compare_classifiers_predictions", | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 1121 "confidence_thresholding_2thresholds_2d", | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 1122 "confidence_thresholding_2thresholds_3d", | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 1123 "confidence_thresholding", | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 1124 "confidence_thresholding_data_vs_acc", | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 1125 "binary_threshold_vs_metric", | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 1126 "roc_curves", | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 1127 "roc_curves_from_test_statistics", | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 1128 "calibration_1_vs_all", | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 1129 "calibration_multiclass", | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 1130 "confusion_matrix", | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 1131 "frequency_vs_f1", | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1132 } | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1133 train_plots = { | 
| 1 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 1134 "learning_curves", | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 1135 "compare_classifiers_performance_subset", | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1136 } | 
| 9 
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
 goeckslab parents: 
8diff
changeset | 1137 | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1138 output_dir = Path(output_dir) | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1139 exp_dirs = sorted( | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1140 output_dir.glob("experiment_run*"), | 
| 1 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 1141 key=lambda p: p.stat().st_mtime, | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1142 ) | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1143 if not exp_dirs: | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1144 logger.warning(f"No experiment run dirs found in {output_dir}") | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1145 return | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1146 exp_dir = exp_dirs[-1] | 
| 9 
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
 goeckslab parents: 
8diff
changeset | 1147 | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1148 viz_dir = exp_dir / "visualizations" | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1149 viz_dir.mkdir(exist_ok=True) | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1150 train_viz = viz_dir / "train" | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1151 test_viz = viz_dir / "test" | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1152 train_viz.mkdir(parents=True, exist_ok=True) | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1153 test_viz.mkdir(parents=True, exist_ok=True) | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1154 | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1155 def _check(p: Path) -> Optional[str]: | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1156 return str(p) if p.exists() else None | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1157 | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1158 training_stats = _check(exp_dir / "training_statistics.json") | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1159 test_stats = _check(exp_dir / TEST_STATISTICS_FILE_NAME) | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1160 probs_path = _check(exp_dir / PREDICTIONS_PARQUET_FILE_NAME) | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1161 gt_metadata = _check(exp_dir / "model" / TRAIN_SET_METADATA_FILE_NAME) | 
| 9 
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
 goeckslab parents: 
8diff
changeset | 1162 | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1163 dataset_path = None | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1164 split_file = None | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1165 desc = exp_dir / DESCRIPTION_FILE_NAME | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1166 if desc.exists(): | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1167 with open(desc, "r") as f: | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1168 cfg = json.load(f) | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1169 dataset_path = _check(Path(cfg.get("dataset", ""))) | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1170 split_file = _check(Path(get_split_path(cfg.get("dataset", "")))) | 
| 9 
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
 goeckslab parents: 
8diff
changeset | 1171 | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1172 output_feature = "" | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1173 if desc.exists(): | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1174 try: | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1175 output_feature = cfg["config"]["output_features"][0]["name"] | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1176 except Exception: | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1177 pass | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1178 if not output_feature and test_stats: | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1179 with open(test_stats, "r") as f: | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1180 stats = json.load(f) | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1181 output_feature = next(iter(stats.keys()), "") | 
| 9 
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
 goeckslab parents: 
8diff
changeset | 1182 | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1183 viz_registry = get_visualizations_registry() | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1184 for viz_name, viz_func in viz_registry.items(): | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1185 if viz_name in train_plots: | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1186 viz_dir_plot = train_viz | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1187 elif viz_name in test_plots: | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1188 viz_dir_plot = test_viz | 
| 2 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 1189 else: | 
| 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 1190 continue | 
| 9 
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
 goeckslab parents: 
8diff
changeset | 1191 | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1192 try: | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1193 viz_func( | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1194 training_statistics=[training_stats] if training_stats else [], | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1195 test_statistics=[test_stats] if test_stats else [], | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1196 probabilities=[probs_path] if probs_path else [], | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1197 output_feature_name=output_feature, | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1198 ground_truth_split=2, | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1199 top_n_classes=[0], | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1200 top_k=3, | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1201 ground_truth_metadata=gt_metadata, | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1202 ground_truth=dataset_path, | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1203 split_file=split_file, | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1204 output_directory=str(viz_dir_plot), | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1205 normalize=False, | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1206 file_format="png", | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1207 ) | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1208 logger.info(f"✔ Generated {viz_name}") | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1209 except Exception as e: | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1210 logger.warning(f"✘ Skipped {viz_name}: {e}") | 
| 9 
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
 goeckslab parents: 
8diff
changeset | 1211 | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1212 logger.info(f"All visualizations written to {viz_dir}") | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1213 | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1214 def generate_html_report( | 
| 1 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 1215 self, | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 1216 title: str, | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 1217 output_dir: str, | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 1218 config: dict, | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 1219 split_info: str, | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 1220 ) -> Path: | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 1221 """Assemble an HTML report from visualizations under train_val/ and test/ folders.""" | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1222 cwd = Path.cwd() | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1223 report_name = title.lower().replace(" ", "_") + "_report.html" | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1224 report_path = cwd / report_name | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1225 output_dir = Path(output_dir) | 
| 2 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 1226 output_type = None | 
| 9 
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
 goeckslab parents: 
8diff
changeset | 1227 | 
| 1 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 1228 exp_dirs = sorted( | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 1229 output_dir.glob("experiment_run*"), | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 1230 key=lambda p: p.stat().st_mtime, | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 1231 ) | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1232 if not exp_dirs: | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1233 raise RuntimeError(f"No 'experiment*' dirs found in {output_dir}") | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1234 exp_dir = exp_dirs[-1] | 
| 9 
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
 goeckslab parents: 
8diff
changeset | 1235 | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1236 base_viz_dir = exp_dir / "visualizations" | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1237 train_viz_dir = base_viz_dir / "train" | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1238 test_viz_dir = base_viz_dir / "test" | 
| 9 
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
 goeckslab parents: 
8diff
changeset | 1239 | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1240 html = get_html_template() | 
| 10 
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
 goeckslab parents: 
9diff
changeset | 1241 | 
| 
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
 goeckslab parents: 
9diff
changeset | 1242 # Extra CSS & JS: center Plotly and enable CSV download for predictions table | 
| 
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
 goeckslab parents: 
9diff
changeset | 1243 html += """ | 
| 
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
 goeckslab parents: 
9diff
changeset | 1244 <style> | 
| 
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
 goeckslab parents: 
9diff
changeset | 1245 /* Center Plotly figures (both wrapper and native classes) */ | 
| 
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
 goeckslab parents: 
9diff
changeset | 1246 .plotly-center { display: flex; justify-content: center; } | 
| 
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
 goeckslab parents: 
9diff
changeset | 1247 .plotly-center .plotly-graph-div, .plotly-center .js-plotly-plot { margin: 0 auto !important; } | 
| 
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
 goeckslab parents: 
9diff
changeset | 1248 .js-plotly-plot, .plotly-graph-div { margin-left: auto !important; margin-right: auto !important; } | 
| 
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
 goeckslab parents: 
9diff
changeset | 1249 | 
| 
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
 goeckslab parents: 
9diff
changeset | 1250 /* Download button for predictions table */ | 
| 
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
 goeckslab parents: 
9diff
changeset | 1251 .download-btn { | 
| 
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
 goeckslab parents: 
9diff
changeset | 1252 padding: 8px 12px; | 
| 
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
 goeckslab parents: 
9diff
changeset | 1253 border: 1px solid #4CAF50; | 
| 
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
 goeckslab parents: 
9diff
changeset | 1254 background: #4CAF50; | 
| 
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
 goeckslab parents: 
9diff
changeset | 1255 color: white; | 
| 
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
 goeckslab parents: 
9diff
changeset | 1256 border-radius: 6px; | 
| 
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
 goeckslab parents: 
9diff
changeset | 1257 cursor: pointer; | 
| 
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
 goeckslab parents: 
9diff
changeset | 1258 } | 
| 
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
 goeckslab parents: 
9diff
changeset | 1259 .download-btn:hover { filter: brightness(0.95); } | 
| 
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
 goeckslab parents: 
9diff
changeset | 1260 .preds-controls { | 
| 
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
 goeckslab parents: 
9diff
changeset | 1261 display: flex; | 
| 
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
 goeckslab parents: 
9diff
changeset | 1262 justify-content: flex-end; | 
| 
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
 goeckslab parents: 
9diff
changeset | 1263 gap: 8px; | 
| 
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
 goeckslab parents: 
9diff
changeset | 1264 margin: 8px 0; | 
| 
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
 goeckslab parents: 
9diff
changeset | 1265 } | 
| 
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
 goeckslab parents: 
9diff
changeset | 1266 </style> | 
| 
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
 goeckslab parents: 
9diff
changeset | 1267 <script> | 
| 
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
 goeckslab parents: 
9diff
changeset | 1268 function tableToCSV(table){ | 
| 
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
 goeckslab parents: 
9diff
changeset | 1269 const rows = Array.from(table.querySelectorAll('tr')); | 
| 
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
 goeckslab parents: 
9diff
changeset | 1270 return rows.map(row => | 
| 
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
 goeckslab parents: 
9diff
changeset | 1271 Array.from(row.querySelectorAll('th,td')).map(cell => { | 
| 
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
 goeckslab parents: 
9diff
changeset | 1272 let text = cell.innerText.replace(/\\r?\\n|\\r/g,' ').trim(); | 
| 
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
 goeckslab parents: 
9diff
changeset | 1273 if (text.includes('"') || text.includes(',')) { | 
| 
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
 goeckslab parents: 
9diff
changeset | 1274 text = '"' + text.replace(/"/g,'""') + '"'; | 
| 
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
 goeckslab parents: 
9diff
changeset | 1275 } | 
| 
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
 goeckslab parents: 
9diff
changeset | 1276 return text; | 
| 
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
 goeckslab parents: 
9diff
changeset | 1277 }).join(',') | 
| 
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
 goeckslab parents: 
9diff
changeset | 1278 ).join('\\n'); | 
| 
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
 goeckslab parents: 
9diff
changeset | 1279 } | 
| 
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
 goeckslab parents: 
9diff
changeset | 1280 document.addEventListener('DOMContentLoaded', function(){ | 
| 
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
 goeckslab parents: 
9diff
changeset | 1281 const btn = document.getElementById('downloadPredsCsv'); | 
| 
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
 goeckslab parents: 
9diff
changeset | 1282 if(btn){ | 
| 
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
 goeckslab parents: 
9diff
changeset | 1283 btn.addEventListener('click', function(){ | 
| 
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
 goeckslab parents: 
9diff
changeset | 1284 const tbl = document.querySelector('.predictions-table'); | 
| 
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
 goeckslab parents: 
9diff
changeset | 1285 if(!tbl){ alert('Predictions table not found.'); return; } | 
| 
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
 goeckslab parents: 
9diff
changeset | 1286 const csv = tableToCSV(tbl); | 
| 
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
 goeckslab parents: 
9diff
changeset | 1287 const blob = new Blob([csv], {type: 'text/csv;charset=utf-8;'}); | 
| 
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
 goeckslab parents: 
9diff
changeset | 1288 const url = URL.createObjectURL(blob); | 
| 
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
 goeckslab parents: 
9diff
changeset | 1289 const a = document.createElement('a'); | 
| 
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
 goeckslab parents: 
9diff
changeset | 1290 a.href = url; | 
| 
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
 goeckslab parents: 
9diff
changeset | 1291 a.download = 'ground_truth_vs_predictions.csv'; | 
| 
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
 goeckslab parents: 
9diff
changeset | 1292 document.body.appendChild(a); | 
| 
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
 goeckslab parents: 
9diff
changeset | 1293 a.click(); | 
| 
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
 goeckslab parents: 
9diff
changeset | 1294 document.body.removeChild(a); | 
| 
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
 goeckslab parents: 
9diff
changeset | 1295 URL.revokeObjectURL(url); | 
| 
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
 goeckslab parents: 
9diff
changeset | 1296 }); | 
| 
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
 goeckslab parents: 
9diff
changeset | 1297 } | 
| 
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
 goeckslab parents: 
9diff
changeset | 1298 }); | 
| 
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
 goeckslab parents: 
9diff
changeset | 1299 </script> | 
| 
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
 goeckslab parents: 
9diff
changeset | 1300 """ | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1301 html += f"<h1>{title}</h1>" | 
| 9 
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
 goeckslab parents: 
8diff
changeset | 1302 | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1303 metrics_html = "" | 
| 1 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 1304 train_val_metrics_html = "" | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 1305 test_metrics_html = "" | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1306 try: | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1307 train_stats_path = exp_dir / "training_statistics.json" | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1308 test_stats_path = exp_dir / TEST_STATISTICS_FILE_NAME | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1309 if train_stats_path.exists() and test_stats_path.exists(): | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1310 with open(train_stats_path) as f: | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1311 train_stats = json.load(f) | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1312 with open(test_stats_path) as f: | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1313 test_stats = json.load(f) | 
| 1 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 1314 output_type = detect_output_type(test_stats) | 
| 11 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1315 metrics_html = format_stats_table_html(train_stats, test_stats, output_type) | 
| 1 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 1316 train_val_metrics_html = format_train_val_stats_table_html( | 
| 2 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 1317 train_stats, test_stats | 
| 1 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 1318 ) | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 1319 test_metrics_html = format_test_merged_stats_table_html( | 
| 2 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 1320 extract_metrics_from_json(train_stats, test_stats, output_type)[ | 
| 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 1321 "test" | 
| 11 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1322 ], output_type | 
| 1 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 1323 ) | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1324 except Exception as e: | 
| 1 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 1325 logger.warning( | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 1326 f"Could not load stats for HTML report: {type(e).__name__}: {e}" | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 1327 ) | 
| 9 
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
 goeckslab parents: 
8diff
changeset | 1328 | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1329 config_html = "" | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1330 training_progress = self.get_training_process(output_dir) | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1331 try: | 
| 2 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 1332 config_html = format_config_table_html( | 
| 9 
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
 goeckslab parents: 
8diff
changeset | 1333 config, split_info, training_progress, output_type | 
| 2 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 1334 ) | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1335 except Exception as e: | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1336 logger.warning(f"Could not load config for HTML report: {e}") | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1337 | 
| 10 
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
 goeckslab parents: 
9diff
changeset | 1338 # ---------- image rendering with exclusions ---------- | 
| 2 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 1339 def render_img_section( | 
| 10 
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
 goeckslab parents: 
9diff
changeset | 1340 title: str, | 
| 
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
 goeckslab parents: 
9diff
changeset | 1341 dir_path: Path, | 
| 
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
 goeckslab parents: 
9diff
changeset | 1342 output_type: str = None, | 
| 
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
 goeckslab parents: 
9diff
changeset | 1343 exclude_names: Optional[set] = None, | 
| 2 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 1344 ) -> str: | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1345 if not dir_path.exists(): | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1346 return f"<h2>{title}</h2><p><em>Directory not found.</em></p>" | 
| 10 
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
 goeckslab parents: 
9diff
changeset | 1347 | 
| 
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
 goeckslab parents: 
9diff
changeset | 1348 exclude_names = exclude_names or set() | 
| 
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
 goeckslab parents: 
9diff
changeset | 1349 | 
| 1 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 1350 imgs = list(dir_path.glob("*.png")) | 
| 10 
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
 goeckslab parents: 
9diff
changeset | 1351 | 
| 11 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1352 # Exclude ROC curves and standard confusion matrices (keep only entropy version) | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1353 default_exclude = { | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1354 # "roc_curves.png", # Remove ROC curves from test tab | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1355 "confusion_matrix__label_top5.png", # Remove standard confusion matrix | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1356 "confusion_matrix__label_top10.png", # Remove duplicate | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1357 "confusion_matrix__label_top6.png", # Remove duplicate | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1358 "confusion_matrix_entropy__label_top10.png", # Keep only top5 | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1359 "confusion_matrix_entropy__label_top6.png", # Keep only top5 | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1360 } | 
| 10 
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
 goeckslab parents: 
9diff
changeset | 1361 | 
| 8 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 1362 imgs = [ | 
| 9 
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
 goeckslab parents: 
8diff
changeset | 1363 img | 
| 
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
 goeckslab parents: 
8diff
changeset | 1364 for img in imgs | 
| 10 
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
 goeckslab parents: 
9diff
changeset | 1365 if img.name not in default_exclude | 
| 
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
 goeckslab parents: 
9diff
changeset | 1366 and img.name not in exclude_names | 
| 8 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 1367 ] | 
| 10 
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
 goeckslab parents: 
9diff
changeset | 1368 | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1369 if not imgs: | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1370 return f"<h2>{title}</h2><p><em>No plots found.</em></p>" | 
| 10 
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
 goeckslab parents: 
9diff
changeset | 1371 | 
| 11 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1372 # Sort images by name for consistent ordering (works with string and numeric labels) | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1373 imgs = sorted(imgs, key=lambda x: x.name) | 
| 10 
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
 goeckslab parents: 
9diff
changeset | 1374 | 
| 
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
 goeckslab parents: 
9diff
changeset | 1375 html_section = "" | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1376 for img in imgs: | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1377 b64 = encode_image_to_base64(str(img)) | 
| 8 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 1378 img_title = img.stem.replace("_", " ").title() | 
| 10 
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
 goeckslab parents: 
9diff
changeset | 1379 html_section += ( | 
| 8 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 1380 f"<h2 style='text-align: center;'>{img_title}</h2>" | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1381 f'<div class="plot" style="margin-bottom:20px;text-align:center;">' | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1382 f'<img src="data:image/png;base64,{b64}" ' | 
| 1 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 1383 f'style="max-width:90%;max-height:600px;border:1px solid #ddd;" />' | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 1384 f"</div>" | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1385 ) | 
| 10 
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
 goeckslab parents: 
9diff
changeset | 1386 return html_section | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1387 | 
| 2 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 1388 tab1_content = config_html + metrics_html | 
| 10 
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
 goeckslab parents: 
9diff
changeset | 1389 | 
| 2 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 1390 tab2_content = train_val_metrics_html + render_img_section( | 
| 10 
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
 goeckslab parents: 
9diff
changeset | 1391 "Training and Validation Visualizations", | 
| 
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
 goeckslab parents: 
9diff
changeset | 1392 train_viz_dir, | 
| 
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
 goeckslab parents: 
9diff
changeset | 1393 output_type, | 
| 
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
 goeckslab parents: 
9diff
changeset | 1394 exclude_names={ | 
| 
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
 goeckslab parents: 
9diff
changeset | 1395 "compare_classifiers_performance_from_prob.png", | 
| 
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
 goeckslab parents: 
9diff
changeset | 1396 "roc_curves_from_prediction_statistics.png", | 
| 
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
 goeckslab parents: 
9diff
changeset | 1397 "precision_recall_curves_from_prediction_statistics.png", | 
| 
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
 goeckslab parents: 
9diff
changeset | 1398 "precision_recall_curve.png", | 
| 
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
 goeckslab parents: 
9diff
changeset | 1399 }, | 
| 1 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 1400 ) | 
| 10 
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
 goeckslab parents: 
9diff
changeset | 1401 | 
| 
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
 goeckslab parents: 
9diff
changeset | 1402 # --- Predictions vs Ground Truth table (REGRESSION ONLY) --- | 
| 2 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 1403 preds_section = "" | 
| 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 1404 parquet_path = exp_dir / PREDICTIONS_PARQUET_FILE_NAME | 
| 8 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 1405 if output_type == "regression" and parquet_path.exists(): | 
| 2 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 1406 try: | 
| 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 1407 # 1) load predictions from Parquet | 
| 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 1408 df_preds = pd.read_parquet(parquet_path).reset_index(drop=True) | 
| 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 1409 # assume the column containing your model's prediction is named "prediction" | 
| 11 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1410 # or contains that substring: | 
| 2 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 1411 pred_col = next( | 
| 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 1412 (c for c in df_preds.columns if "prediction" in c.lower()), | 
| 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 1413 None, | 
| 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 1414 ) | 
| 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 1415 if pred_col is None: | 
| 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 1416 raise ValueError("No prediction column found in Parquet output") | 
| 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 1417 df_pred = df_preds[[pred_col]].rename(columns={pred_col: "prediction"}) | 
| 11 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1418 | 
| 2 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 1419 # 2) load ground truth for the test split from prepared CSV | 
| 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 1420 df_all = pd.read_csv(config["label_column_data_path"]) | 
| 9 
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
 goeckslab parents: 
8diff
changeset | 1421 df_gt = df_all[df_all[SPLIT_COLUMN_NAME] == 2][ | 
| 
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
 goeckslab parents: 
8diff
changeset | 1422 LABEL_COLUMN_NAME | 
| 
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
 goeckslab parents: 
8diff
changeset | 1423 ].reset_index(drop=True) | 
| 8 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 1424 # 3) concatenate side-by-side | 
| 2 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 1425 df_table = pd.concat([df_gt, df_pred], axis=1) | 
| 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 1426 df_table.columns = [LABEL_COLUMN_NAME, "prediction"] | 
| 11 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1427 | 
| 2 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 1428 # 4) render as HTML | 
| 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 1429 preds_html = df_table.to_html(index=False, classes="predictions-table") | 
| 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 1430 preds_section = ( | 
| 8 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 1431 "<h2 style='text-align: center;'>Ground Truth vs. Predictions</h2>" | 
| 10 
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
 goeckslab parents: 
9diff
changeset | 1432 "<div class='preds-controls'>" | 
| 
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
 goeckslab parents: 
9diff
changeset | 1433 "<button id='downloadPredsCsv' class='download-btn'>Download CSV</button>" | 
| 
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
 goeckslab parents: 
9diff
changeset | 1434 "</div>" | 
| 
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
 goeckslab parents: 
9diff
changeset | 1435 "<div class='scroll-rows-30' style='overflow-x:auto; overflow-y:auto; max-height:900px; margin-bottom:20px;'>" | 
| 2 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 1436 + preds_html | 
| 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 1437 + "</div>" | 
| 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 1438 ) | 
| 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 1439 except Exception as e: | 
| 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 1440 logger.warning(f"Could not build Predictions vs GT table: {e}") | 
| 10 
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
 goeckslab parents: 
9diff
changeset | 1441 | 
| 8 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 1442 tab3_content = test_metrics_html + preds_section | 
| 10 
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
 goeckslab parents: 
9diff
changeset | 1443 | 
| 11 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1444 if output_type in ("binary", "category") and test_stats_path.exists(): | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1445 try: | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1446 interactive_plots = build_classification_plots( | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1447 str(test_stats_path), | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1448 str(train_stats_path) if train_stats_path.exists() else None, | 
| 8 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 1449 ) | 
| 11 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1450 for plot in interactive_plots: | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1451 tab3_content += ( | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1452 f"<h2 style='text-align: center;'>{plot['title']}</h2>" | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1453 f"<div class='plotly-center'>{plot['html']}</div>" | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1454 ) | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1455 logger.info(f"Generated {len(interactive_plots)} interactive Plotly plots") | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1456 except Exception as e: | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1457 logger.warning(f"Could not generate Plotly plots: {e}") | 
| 10 
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
 goeckslab parents: 
9diff
changeset | 1458 | 
| 
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
 goeckslab parents: 
9diff
changeset | 1459 # Add static TEST PNGs (with default dedupe/exclusions) | 
| 
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
 goeckslab parents: 
9diff
changeset | 1460 tab3_content += render_img_section( | 
| 
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
 goeckslab parents: 
9diff
changeset | 1461 "Test Visualizations", test_viz_dir, output_type | 
| 
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
 goeckslab parents: 
9diff
changeset | 1462 ) | 
| 
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
 goeckslab parents: 
9diff
changeset | 1463 | 
| 1 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 1464 tabbed_html = build_tabbed_html(tab1_content, tab2_content, tab3_content) | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 1465 modal_html = get_metrics_help_modal() | 
| 2 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 1466 html += tabbed_html + modal_html + get_html_closing() | 
| 9 
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
 goeckslab parents: 
8diff
changeset | 1467 | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1468 try: | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1469 with open(report_path, "w") as f: | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1470 f.write(html) | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1471 logger.info(f"HTML report generated at: {report_path}") | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1472 except Exception as e: | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1473 logger.error(f"Failed to write HTML report: {e}") | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1474 raise | 
| 9 
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
 goeckslab parents: 
8diff
changeset | 1475 | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1476 return report_path | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1477 | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1478 | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1479 class WorkflowOrchestrator: | 
| 1 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 1480 """Manages the image-classification workflow.""" | 
| 9 
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
 goeckslab parents: 
8diff
changeset | 1481 | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1482 def __init__(self, args: argparse.Namespace, backend: Backend): | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1483 self.args = args | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1484 self.backend = backend | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1485 self.temp_dir: Optional[Path] = None | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1486 self.image_extract_dir: Optional[Path] = None | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1487 logger.info(f"Orchestrator initialized with backend: {type(backend).__name__}") | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1488 | 
| 11 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1489 def run(self) -> None: | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1490 """Execute the full workflow end-to-end.""" | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1491 # Delegate to the backend's run_experiment method | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1492 self.backend.run_experiment() | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1493 | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1494 | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1495 class ImageLearnerCLI: | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1496 """Manages the image-classification workflow.""" | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1497 | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1498 def __init__(self, args: argparse.Namespace, backend: Backend): | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1499 self.args = args | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1500 self.backend = backend | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1501 self.temp_dir: Optional[Path] = None | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1502 self.image_extract_dir: Optional[Path] = None | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1503 logger.info(f"Orchestrator initialized with backend: {type(backend).__name__}") | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1504 | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1505 def _create_temp_dirs(self) -> None: | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1506 """Create temporary output and image extraction directories.""" | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1507 try: | 
| 1 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 1508 self.temp_dir = Path( | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 1509 tempfile.mkdtemp(dir=self.args.output_dir, prefix=TEMP_DIR_PREFIX) | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 1510 ) | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1511 self.image_extract_dir = self.temp_dir / "images" | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1512 self.image_extract_dir.mkdir() | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1513 logger.info(f"Created temp directory: {self.temp_dir}") | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1514 except Exception: | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1515 logger.error("Failed to create temporary directories", exc_info=True) | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1516 raise | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1517 | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1518 def _extract_images(self) -> None: | 
| 11 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1519 """Extract images into the temp image directory. | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1520 - If a ZIP file is provided, extract it | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1521 - If a directory is provided, copy its contents | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1522 """ | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1523 if self.image_extract_dir is None: | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1524 raise RuntimeError("Temp image directory not initialized.") | 
| 11 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1525 src = Path(self.args.image_zip) | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1526 logger.info(f"Preparing images from {src} → {self.image_extract_dir}") | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1527 try: | 
| 11 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1528 if src.is_dir(): | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1529 # copy directory tree | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1530 for root, dirs, files in os.walk(src): | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1531 rel = Path(root).relative_to(src) | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1532 target_root = self.image_extract_dir / rel | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1533 target_root.mkdir(parents=True, exist_ok=True) | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1534 for fn in files: | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1535 shutil.copy2(Path(root) / fn, target_root / fn) | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1536 logger.info("Image directory copied.") | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1537 else: | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1538 with zipfile.ZipFile(src, "r") as z: | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1539 z.extractall(self.image_extract_dir) | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1540 logger.info("Image extraction complete.") | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1541 except Exception: | 
| 11 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1542 logger.error("Error preparing images", exc_info=True) | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1543 raise | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1544 | 
| 11 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1545 def _process_fixed_split( | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1546 self, df: pd.DataFrame | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1547 ) -> Tuple[pd.DataFrame, Dict[str, Any], str]: | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1548 """Process datasets that already have a split column.""" | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1549 unique = set(df[SPLIT_COLUMN_NAME].unique()) | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1550 if unique == {0, 2}: | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1551 # Split 0/2 detected, create validation set | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1552 df = split_data_0_2( | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1553 df=df, | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1554 split_column=SPLIT_COLUMN_NAME, | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1555 validation_size=self.args.validation_size, | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1556 random_state=self.args.random_seed, | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1557 label_column=LABEL_COLUMN_NAME, | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1558 ) | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1559 split_config = {"type": "fixed", "column": SPLIT_COLUMN_NAME} | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1560 split_info = ( | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1561 "Detected a split column (with values 0 and 2) in the input CSV. " | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1562 f"Used this column as a base and reassigned " | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1563 f"{self.args.validation_size * 100:.1f}% " | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1564 "of the training set (originally labeled 0) to validation (labeled 1) using stratified sampling." | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1565 ) | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1566 logger.info("Applied custom 0/2 split.") | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1567 elif unique.issubset({0, 1, 2}): | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1568 # Standard 0/1/2 split | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1569 split_config = {"type": "fixed", "column": SPLIT_COLUMN_NAME} | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1570 split_info = ( | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1571 "Detected a split column with train(0)/validation(1)/test(2) " | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1572 "values in the input CSV. Used this column as-is." | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1573 ) | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1574 logger.info("Fixed split column detected.") | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1575 else: | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1576 raise ValueError( | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1577 f"Split column contains unexpected values: {unique}. " | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1578 "Expected: {{0,1,2}} or {{0,2}}" | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1579 ) | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1580 | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1581 return df, split_config, split_info | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1582 | 
| 2 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 1583 def _prepare_data(self) -> Tuple[Path, Dict[str, Any], str]: | 
| 1 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 1584 """Load CSV, update image paths, handle splits, and write prepared CSV.""" | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1585 if not self.temp_dir or not self.image_extract_dir: | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1586 raise RuntimeError("Temp dirs not initialized before data prep.") | 
| 9 
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
 goeckslab parents: 
8diff
changeset | 1587 | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1588 try: | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1589 df = pd.read_csv(self.args.csv_file) | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1590 logger.info(f"Loaded CSV: {self.args.csv_file}") | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1591 except Exception: | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1592 logger.error("Error loading CSV file", exc_info=True) | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1593 raise | 
| 9 
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
 goeckslab parents: 
8diff
changeset | 1594 | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1595 required = {IMAGE_PATH_COLUMN_NAME, LABEL_COLUMN_NAME} | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1596 missing = required - set(df.columns) | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1597 if missing: | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1598 raise ValueError(f"Missing CSV columns: {', '.join(missing)}") | 
| 9 
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
 goeckslab parents: 
8diff
changeset | 1599 | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1600 try: | 
| 11 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1601 # Use relative paths that Ludwig can resolve from its internal working directory | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1602 df[IMAGE_PATH_COLUMN_NAME] = df[IMAGE_PATH_COLUMN_NAME].apply( | 
| 11 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1603 lambda p: str(Path("images") / p) | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1604 ) | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1605 except Exception: | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1606 logger.error("Error updating image paths", exc_info=True) | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1607 raise | 
| 11 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1608 | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1609 if SPLIT_COLUMN_NAME in df.columns: | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1610 df, split_config, split_info = self._process_fixed_split(df) | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1611 else: | 
| 7 
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
 goeckslab parents: 
6diff
changeset | 1612 logger.info("No split column; creating stratified random split") | 
| 
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
 goeckslab parents: 
6diff
changeset | 1613 df = create_stratified_random_split( | 
| 
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
 goeckslab parents: 
6diff
changeset | 1614 df=df, | 
| 
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
 goeckslab parents: 
6diff
changeset | 1615 split_column=SPLIT_COLUMN_NAME, | 
| 
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
 goeckslab parents: 
6diff
changeset | 1616 split_probabilities=self.args.split_probabilities, | 
| 
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
 goeckslab parents: 
6diff
changeset | 1617 random_state=self.args.random_seed, | 
| 
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
 goeckslab parents: 
6diff
changeset | 1618 label_column=LABEL_COLUMN_NAME, | 
| 
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
 goeckslab parents: 
6diff
changeset | 1619 ) | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1620 split_config = { | 
| 7 
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
 goeckslab parents: 
6diff
changeset | 1621 "type": "fixed", | 
| 
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
 goeckslab parents: 
6diff
changeset | 1622 "column": SPLIT_COLUMN_NAME, | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1623 } | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1624 split_info = ( | 
| 7 
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
 goeckslab parents: 
6diff
changeset | 1625 f"No split column in CSV. Created stratified random split: " | 
| 1 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 1626 f"{[int(p * 100) for p in self.args.split_probabilities]}% " | 
| 7 
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
 goeckslab parents: 
6diff
changeset | 1627 f"for train/val/test with balanced label distribution." | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1628 ) | 
| 9 
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
 goeckslab parents: 
8diff
changeset | 1629 | 
| 2 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 1630 final_csv = self.temp_dir / TEMP_CSV_FILENAME | 
| 9 
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
 goeckslab parents: 
8diff
changeset | 1631 | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1632 try: | 
| 11 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1633 | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1634 df.to_csv(final_csv, index=False) | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1635 logger.info(f"Saved prepared data to {final_csv}") | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1636 except Exception: | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1637 logger.error("Error saving prepared CSV", exc_info=True) | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1638 raise | 
| 9 
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
 goeckslab parents: 
8diff
changeset | 1639 | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1640 return final_csv, split_config, split_info | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1641 | 
| 11 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1642 # Removed duplicate method | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1643 | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1644 def _detect_image_dimensions(self) -> Tuple[int, int]: | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1645 """Detect image dimensions from the first image in the dataset.""" | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1646 try: | 
| 11 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1647 import zipfile | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1648 from PIL import Image | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1649 import io | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1650 | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1651 # Check if image_zip is provided | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1652 if not self.args.image_zip: | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1653 logger.warning("No image zip provided, using default 224x224") | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1654 return 224, 224 | 
| 9 
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
 goeckslab parents: 
8diff
changeset | 1655 | 
| 11 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1656 # Extract first image to detect dimensions | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1657 with zipfile.ZipFile(self.args.image_zip, 'r') as z: | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1658 image_files = [f for f in z.namelist() if f.lower().endswith(('.png', '.jpg', '.jpeg'))] | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1659 if not image_files: | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1660 logger.warning("No image files found in zip, using default 224x224") | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1661 return 224, 224 | 
| 9 
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
 goeckslab parents: 
8diff
changeset | 1662 | 
| 11 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1663 # Check first image | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1664 with z.open(image_files[0]) as f: | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1665 img = Image.open(io.BytesIO(f.read())) | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1666 width, height = img.size | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1667 logger.info(f"Detected image dimensions: {width}x{height}") | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1668 return height, width # Return as (height, width) to match encoder config | 
| 9 
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
 goeckslab parents: 
8diff
changeset | 1669 | 
| 11 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1670 except Exception as e: | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1671 logger.warning(f"Error detecting image dimensions: {e}, using default 224x224") | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1672 return 224, 224 | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1673 | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1674 def _cleanup_temp_dirs(self) -> None: | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1675 if self.temp_dir and self.temp_dir.exists(): | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1676 logger.info(f"Cleaning up temp directory: {self.temp_dir}") | 
| 11 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1677 # Don't clean up for debugging | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1678 shutil.rmtree(self.temp_dir, ignore_errors=True) | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1679 self.temp_dir = None | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1680 self.image_extract_dir = None | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1681 | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1682 def run(self) -> None: | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1683 """Execute the full workflow end-to-end.""" | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1684 logger.info("Starting workflow...") | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1685 self.args.output_dir.mkdir(parents=True, exist_ok=True) | 
| 9 
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
 goeckslab parents: 
8diff
changeset | 1686 | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1687 try: | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1688 self._create_temp_dirs() | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1689 self._extract_images() | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1690 csv_path, split_cfg, split_info = self._prepare_data() | 
| 9 
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
 goeckslab parents: 
8diff
changeset | 1691 | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1692 use_pretrained = self.args.use_pretrained or self.args.fine_tune | 
| 9 
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
 goeckslab parents: 
8diff
changeset | 1693 | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1694 backend_args = { | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1695 "model_name": self.args.model_name, | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1696 "fine_tune": self.args.fine_tune, | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1697 "use_pretrained": use_pretrained, | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1698 "epochs": self.args.epochs, | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1699 "batch_size": self.args.batch_size, | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1700 "preprocessing_num_processes": self.args.preprocessing_num_processes, | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1701 "split_probabilities": self.args.split_probabilities, | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1702 "learning_rate": self.args.learning_rate, | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1703 "random_seed": self.args.random_seed, | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1704 "early_stop": self.args.early_stop, | 
| 1 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 1705 "label_column_data_path": csv_path, | 
| 2 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 1706 "augmentation": self.args.augmentation, | 
| 11 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1707 "image_resize": self.args.image_resize, | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1708 "image_zip": self.args.image_zip, | 
| 8 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 1709 "threshold": self.args.threshold, | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1710 } | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1711 yaml_str = self.backend.prepare_config(backend_args, split_cfg) | 
| 9 
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
 goeckslab parents: 
8diff
changeset | 1712 | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1713 config_file = self.temp_dir / TEMP_CONFIG_FILENAME | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1714 config_file.write_text(yaml_str) | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1715 logger.info(f"Wrote backend config: {config_file}") | 
| 9 
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
 goeckslab parents: 
8diff
changeset | 1716 | 
| 11 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1717 ran_ok = True | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1718 try: | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1719 # Run Ludwig experiment with absolute paths to avoid working directory issues | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1720 self.backend.run_experiment( | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1721 csv_path, | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1722 config_file, | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1723 self.args.output_dir, | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1724 self.args.random_seed, | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1725 ) | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1726 except Exception: | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1727 logger.error("Workflow execution failed", exc_info=True) | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1728 ran_ok = False | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1729 | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1730 if ran_ok: | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1731 logger.info("Workflow completed successfully.") | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1732 # Generate a very small set of plots to conserve disk space | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1733 self.backend.generate_plots(self.args.output_dir) | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1734 # Build HTML report (robust to missing metrics) | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1735 report_file = self.backend.generate_html_report( | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1736 "Image Classification Results", | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1737 self.args.output_dir, | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1738 backend_args, | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1739 split_info, | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1740 ) | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1741 logger.info(f"HTML report generated at: {report_file}") | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1742 # Convert predictions parquet → csv | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1743 self.backend.convert_parquet_to_csv(self.args.output_dir) | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1744 logger.info("Converted Parquet to CSV.") | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1745 # Post-process cleanup to reduce disk footprint for subsequent tests | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1746 try: | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1747 self._postprocess_cleanup(self.args.output_dir) | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1748 except Exception as cleanup_err: | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1749 logger.warning(f"Cleanup step failed: {cleanup_err}") | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1750 else: | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1751 # Fallback: create minimal outputs so downstream steps can proceed | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1752 logger.warning("Falling back to minimal outputs due to runtime failure.") | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1753 try: | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1754 self._create_minimal_outputs(self.args.output_dir, csv_path) | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1755 # Even in fallback, produce an HTML shell so tests find required text | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1756 report_file = self.backend.generate_html_report( | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1757 "Image Classification Results", | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1758 self.args.output_dir, | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1759 backend_args, | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1760 split_info, | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1761 ) | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1762 logger.info(f"HTML report (fallback) generated at: {report_file}") | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1763 except Exception as fb_err: | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1764 logger.error(f"Failed to build fallback outputs: {fb_err}") | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1765 raise | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1766 | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1767 except Exception: | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1768 logger.error("Workflow execution failed", exc_info=True) | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1769 raise | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1770 finally: | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1771 self._cleanup_temp_dirs() | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1772 | 
| 11 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1773 def _postprocess_cleanup(self, output_dir: Path) -> None: | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1774 """Remove large intermediates and caches to conserve disk space across tests.""" | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1775 output_dir = Path(output_dir) | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1776 exp_dirs = sorted( | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1777 output_dir.glob("experiment_run*"), | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1778 key=lambda p: p.stat().st_mtime, | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1779 ) | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1780 if exp_dirs: | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1781 exp_dir = exp_dirs[-1] | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1782 # Remove training checkpoints directory if present | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1783 ckpt_dir = exp_dir / "model" / "training_checkpoints" | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1784 if ckpt_dir.exists(): | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1785 shutil.rmtree(ckpt_dir, ignore_errors=True) | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1786 # Remove predictions parquet once CSV is generated | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1787 parquet_path = exp_dir / PREDICTIONS_PARQUET_FILE_NAME | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1788 if parquet_path.exists(): | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1789 try: | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1790 parquet_path.unlink() | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1791 except Exception: | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1792 pass | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1793 | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1794 # Clear torch hub cache under the job-scoped home, if present | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1795 job_home_torch_hub = Path.cwd() / "home" / ".cache" / "torch" / "hub" | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1796 if job_home_torch_hub.exists(): | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1797 shutil.rmtree(job_home_torch_hub, ignore_errors=True) | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1798 | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1799 # Also try the default user cache as a best-effort (may not exist in job sandbox) | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1800 user_home_torch_hub = Path.home() / ".cache" / "torch" / "hub" | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1801 if user_home_torch_hub.exists(): | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1802 shutil.rmtree(user_home_torch_hub, ignore_errors=True) | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1803 | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1804 # Clear huggingface cache if present in the job sandbox | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1805 job_home_hf = Path.cwd() / "home" / ".cache" / "huggingface" | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1806 if job_home_hf.exists(): | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1807 shutil.rmtree(job_home_hf, ignore_errors=True) | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1808 | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1809 def _create_minimal_outputs(self, output_dir: Path, prepared_csv_path: Path) -> None: | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1810 """Create a minimal set of outputs so Galaxy can collect expected artifacts. | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1811 | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1812 - experiment_run/ | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1813 - predictions.csv (1 column) | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1814 - visualizations/train/ (empty) | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1815 - visualizations/test/ (empty) | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1816 - model/ | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1817 - model_weights/ (empty) | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1818 - model_hyperparameters.json (stub) | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1819 """ | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1820 output_dir = Path(output_dir) | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1821 exp_dir = output_dir / "experiment_run" | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1822 (exp_dir / "visualizations" / "train").mkdir(parents=True, exist_ok=True) | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1823 (exp_dir / "visualizations" / "test").mkdir(parents=True, exist_ok=True) | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1824 model_dir = exp_dir / "model" | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1825 (model_dir / "model_weights").mkdir(parents=True, exist_ok=True) | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1826 | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1827 # Stub JSON so the tool's copy step succeeds | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1828 try: | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1829 (model_dir / "model_hyperparameters.json").write_text("{}\n") | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1830 except Exception: | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1831 pass | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1832 | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1833 # Create a small predictions.csv with exactly 1 column | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1834 try: | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1835 df_all = pd.read_csv(prepared_csv_path) | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1836 from constants import SPLIT_COLUMN_NAME # local import to avoid cycle at top | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1837 num_rows = int((df_all[SPLIT_COLUMN_NAME] == 2).sum()) if SPLIT_COLUMN_NAME in df_all.columns else 1 | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1838 except Exception: | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1839 num_rows = 1 | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1840 num_rows = max(1, num_rows) | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1841 pd.DataFrame({"prediction": [0] * num_rows}).to_csv(exp_dir / "predictions.csv", index=False) | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1842 | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1843 | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1844 def parse_learning_rate(s): | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1845 try: | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1846 return float(s) | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1847 except (TypeError, ValueError): | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1848 return None | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1849 | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1850 | 
| 2 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 1851 def aug_parse(aug_string: str): | 
| 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 1852 """ | 
| 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 1853 Parse comma-separated augmentation keys into Ludwig augmentation dicts. | 
| 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 1854 Raises ValueError on unknown key. | 
| 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 1855 """ | 
| 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 1856 mapping = { | 
| 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 1857 "random_horizontal_flip": {"type": "random_horizontal_flip"}, | 
| 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 1858 "random_vertical_flip": {"type": "random_vertical_flip"}, | 
| 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 1859 "random_rotate": {"type": "random_rotate", "degree": 10}, | 
| 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 1860 "random_blur": {"type": "random_blur", "kernel_size": 3}, | 
| 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 1861 "random_brightness": {"type": "random_brightness", "min": 0.5, "max": 2.0}, | 
| 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 1862 "random_contrast": {"type": "random_contrast", "min": 0.5, "max": 2.0}, | 
| 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 1863 } | 
| 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 1864 aug_list = [] | 
| 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 1865 for tok in aug_string.split(","): | 
| 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 1866 key = tok.strip() | 
| 11 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1867 if not key: | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1868 continue | 
| 2 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 1869 if key not in mapping: | 
| 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 1870 valid = ", ".join(mapping.keys()) | 
| 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 1871 raise ValueError(f"Unknown augmentation '{key}'. Valid choices: {valid}") | 
| 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 1872 aug_list.append(mapping[key]) | 
| 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 1873 return aug_list | 
| 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 1874 | 
| 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 1875 | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1876 class SplitProbAction(argparse.Action): | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1877 def __call__(self, parser, namespace, values, option_string=None): | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1878 train, val, test = values | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1879 total = train + val + test | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1880 if abs(total - 1.0) > 1e-6: | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1881 parser.error( | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1882 f"--split-probabilities must sum to 1.0; " | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1883 f"got {train:.3f} + {val:.3f} + {test:.3f} = {total:.3f}" | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1884 ) | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1885 setattr(namespace, self.dest, values) | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1886 | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1887 | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1888 def main(): | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1889 parser = argparse.ArgumentParser( | 
| 1 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 1890 description="Image Classification Learner with Pluggable Backends", | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1891 ) | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1892 parser.add_argument( | 
| 1 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 1893 "--csv-file", | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 1894 required=True, | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 1895 type=Path, | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 1896 help="Path to the input CSV", | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1897 ) | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1898 parser.add_argument( | 
| 1 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 1899 "--image-zip", | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 1900 required=True, | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 1901 type=Path, | 
| 11 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1902 help="Path to the images ZIP or a directory containing images", | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1903 ) | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1904 parser.add_argument( | 
| 1 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 1905 "--model-name", | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 1906 required=True, | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1907 choices=MODEL_ENCODER_TEMPLATES.keys(), | 
| 1 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 1908 help="Which model template to use", | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1909 ) | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1910 parser.add_argument( | 
| 1 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 1911 "--use-pretrained", | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 1912 action="store_true", | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 1913 help="Use pretrained weights for the model", | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1914 ) | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1915 parser.add_argument( | 
| 1 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 1916 "--fine-tune", | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 1917 action="store_true", | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 1918 help="Enable fine-tuning", | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1919 ) | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1920 parser.add_argument( | 
| 1 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 1921 "--epochs", | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 1922 type=int, | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 1923 default=10, | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 1924 help="Number of training epochs", | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1925 ) | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1926 parser.add_argument( | 
| 1 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 1927 "--early-stop", | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 1928 type=int, | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 1929 default=5, | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 1930 help="Early stopping patience", | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1931 ) | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1932 parser.add_argument( | 
| 1 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 1933 "--batch-size", | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 1934 type=int, | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 1935 help="Batch size (None = auto)", | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1936 ) | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1937 parser.add_argument( | 
| 1 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 1938 "--output-dir", | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 1939 type=Path, | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 1940 default=Path("learner_output"), | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 1941 help="Where to write outputs", | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1942 ) | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1943 parser.add_argument( | 
| 1 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 1944 "--validation-size", | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 1945 type=float, | 
| 8 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 1946 default=0.15, | 
| 1 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 1947 help="Fraction for validation (0.0–1.0)", | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1948 ) | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1949 parser.add_argument( | 
| 1 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 1950 "--preprocessing-num-processes", | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 1951 type=int, | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1952 default=max(1, os.cpu_count() // 2), | 
| 1 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 1953 help="CPU processes for data prep", | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1954 ) | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1955 parser.add_argument( | 
| 1 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 1956 "--split-probabilities", | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 1957 type=float, | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 1958 nargs=3, | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1959 metavar=("train", "val", "test"), | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1960 action=SplitProbAction, | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1961 default=[0.7, 0.1, 0.2], | 
| 2 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 1962 help=( | 
| 9 
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
 goeckslab parents: 
8diff
changeset | 1963 "Random split proportions (e.g., 0.7 0.1 0.2).Only used if no split column." | 
| 2 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 1964 ), | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1965 ) | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1966 parser.add_argument( | 
| 1 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 1967 "--random-seed", | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 1968 type=int, | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 1969 default=42, | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 1970 help="Random seed used for dataset splitting (default: 42)", | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1971 ) | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1972 parser.add_argument( | 
| 1 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 1973 "--learning-rate", | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 1974 type=parse_learning_rate, | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 1975 default=None, | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 1976 help="Learning rate. If not provided, Ludwig will auto-select it.", | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1977 ) | 
| 2 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 1978 parser.add_argument( | 
| 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 1979 "--augmentation", | 
| 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 1980 type=str, | 
| 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 1981 default=None, | 
| 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 1982 help=( | 
| 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 1983 "Comma-separated list (in order) of any of: " | 
| 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 1984 "random_horizontal_flip, random_vertical_flip, random_rotate, " | 
| 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 1985 "random_blur, random_brightness, random_contrast. " | 
| 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 1986 "E.g. --augmentation random_horizontal_flip,random_rotate" | 
| 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 1987 ), | 
| 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 1988 ) | 
| 8 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 1989 parser.add_argument( | 
| 11 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1990 "--image-resize", | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1991 type=str, | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1992 choices=[ | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1993 "original", "96x96", "128x128", "160x160", "192x192", "220x220", | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1994 "224x224", "256x256", "299x299", "320x320", "384x384", "448x448", "512x512" | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1995 ], | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1996 default="original", | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1997 help="Image resize option. 'original' keeps images as-is, other options resize to specified dimensions.", | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1998 ) | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 1999 parser.add_argument( | 
| 8 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 2000 "--threshold", | 
| 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 2001 type=float, | 
| 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 2002 default=None, | 
| 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 2003 help=( | 
| 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 2004 "Decision threshold for binary classification (0.0–1.0)." | 
| 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 2005 "Overrides default 0.5." | 
| 9 
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
 goeckslab parents: 
8diff
changeset | 2006 ), | 
| 8 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 2007 ) | 
| 11 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 2008 | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 2009 args = parser.parse_args() | 
| 9 
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
 goeckslab parents: 
8diff
changeset | 2010 | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 2011 if not 0.0 <= args.validation_size <= 1.0: | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 2012 parser.error("validation-size must be between 0.0 and 1.0") | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 2013 if not args.csv_file.is_file(): | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 2014 parser.error(f"CSV not found: {args.csv_file}") | 
| 11 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 2015 if not (args.image_zip.is_file() or args.image_zip.is_dir()): | 
| 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 2016 parser.error(f"ZIP or directory not found: {args.image_zip}") | 
| 2 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 2017 if args.augmentation is not None: | 
| 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 2018 try: | 
| 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 2019 augmentation_setup = aug_parse(args.augmentation) | 
| 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 2020 setattr(args, "augmentation", augmentation_setup) | 
| 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 2021 except ValueError as e: | 
| 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 2022 parser.error(str(e)) | 
| 9 
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
 goeckslab parents: 
8diff
changeset | 2023 | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 2024 backend_instance = LudwigDirectBackend() | 
| 11 
c5150cceab47
planemo upload for repository https://github.com/goeckslab/gleam.git commit 0fe927b618cd4dfc87af7baaa827034cc6813225
 goeckslab parents: 
10diff
changeset | 2025 orchestrator = ImageLearnerCLI(args, backend_instance) | 
| 9 
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
 goeckslab parents: 
8diff
changeset | 2026 | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 2027 exit_code = 0 | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 2028 try: | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 2029 orchestrator.run() | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 2030 logger.info("Main script finished successfully.") | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 2031 except Exception as e: | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 2032 logger.error(f"Main script failed.{e}") | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 2033 exit_code = 1 | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 2034 finally: | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 2035 sys.exit(exit_code) | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 2036 | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 2037 | 
| 1 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 2038 if __name__ == "__main__": | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 2039 try: | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 2040 import ludwig | 
| 9 
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
 goeckslab parents: 
8diff
changeset | 2041 | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 2042 logger.debug(f"Found Ludwig version: {ludwig.globals.LUDWIG_VERSION}") | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 2043 except ImportError: | 
| 1 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 2044 logger.error( | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 2045 "Ludwig library not found. Please ensure Ludwig is installed " | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 2046 "('pip install ludwig[image]')" | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 2047 ) | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 2048 sys.exit(1) | 
| 9 
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
 goeckslab parents: 
8diff
changeset | 2049 | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 2050 main() | 
