Mercurial > repos > goeckslab > image_learner
annotate image_learner_cli.py @ 10:b0d893d04d4c draft
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
| author | goeckslab | 
|---|---|
| date | Mon, 08 Sep 2025 22:38:35 +0000 | 
| parents | 9e912fce264c | 
| children | c5150cceab47 | 
| rev | line source | 
|---|---|
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1 import argparse | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 2 import json | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 3 import logging | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 4 import os | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 5 import shutil | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 6 import sys | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 7 import tempfile | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 8 import zipfile | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 9 from pathlib import Path | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 10 from typing import Any, Dict, Optional, Protocol, Tuple | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 11 | 
| 7 
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
 goeckslab parents: 
6diff
changeset | 12 import numpy as np | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 13 import pandas as pd | 
| 2 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 14 import pandas.api.types as ptypes | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 15 import yaml | 
| 2 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 16 from constants import ( | 
| 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 17 IMAGE_PATH_COLUMN_NAME, | 
| 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 18 LABEL_COLUMN_NAME, | 
| 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 19 METRIC_DISPLAY_NAMES, | 
| 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 20 MODEL_ENCODER_TEMPLATES, | 
| 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 21 SPLIT_COLUMN_NAME, | 
| 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 22 TEMP_CONFIG_FILENAME, | 
| 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 23 TEMP_CSV_FILENAME, | 
| 9 
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
 goeckslab parents: 
8diff
changeset | 24 TEMP_DIR_PREFIX, | 
| 2 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 25 ) | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 26 from ludwig.globals import ( | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 27 DESCRIPTION_FILE_NAME, | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 28 PREDICTIONS_PARQUET_FILE_NAME, | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 29 TEST_STATISTICS_FILE_NAME, | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 30 TRAIN_SET_METADATA_FILE_NAME, | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 31 ) | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 32 from ludwig.utils.data_utils import get_split_path | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 33 from ludwig.visualize import get_visualizations_registry | 
| 8 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 34 from plotly_plots import build_classification_plots | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 35 from sklearn.model_selection import train_test_split | 
| 2 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 36 from utils import ( | 
| 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 37 build_tabbed_html, | 
| 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 38 encode_image_to_base64, | 
| 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 39 get_html_closing, | 
| 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 40 get_html_template, | 
| 9 
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
 goeckslab parents: 
8diff
changeset | 41 get_metrics_help_modal, | 
| 2 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 42 ) | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 43 | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 44 # --- Logging Setup --- | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 45 logging.basicConfig( | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 46 level=logging.INFO, | 
| 9 
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
 goeckslab parents: 
8diff
changeset | 47 format="%(asctime)s %(levelname)s %(name)s: %(message)s", | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 48 ) | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 49 logger = logging.getLogger("ImageLearner") | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 50 | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 51 | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 52 def format_config_table_html( | 
| 1 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 53 config: dict, | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 54 split_info: Optional[str] = None, | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 55 training_progress: dict = None, | 
| 8 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 56 output_type: Optional[str] = None, | 
| 1 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 57 ) -> str: | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 58 display_keys = [ | 
| 2 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 59 "task_type", | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 60 "model_name", | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 61 "epochs", | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 62 "batch_size", | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 63 "fine_tune", | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 64 "use_pretrained", | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 65 "learning_rate", | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 66 "random_seed", | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 67 "early_stop", | 
| 8 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 68 "threshold", | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 69 ] | 
| 9 
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
 goeckslab parents: 
8diff
changeset | 70 | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 71 rows = [] | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 72 for key in display_keys: | 
| 8 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 73 val = config.get(key, None) | 
| 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 74 if key == "threshold": | 
| 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 75 if output_type != "binary": | 
| 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 76 continue | 
| 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 77 val = val if val is not None else 0.5 | 
| 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 78 val_str = f"{val:.2f}" | 
| 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 79 if val == 0.5: | 
| 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 80 val_str += " (default)" | 
| 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 81 else: | 
| 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 82 if key == "task_type": | 
| 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 83 val_str = val.title() if isinstance(val, str) else "N/A" | 
| 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 84 elif key == "batch_size": | 
| 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 85 if val is not None: | 
| 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 86 val_str = int(val) | 
| 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 87 else: | 
| 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 88 if training_progress: | 
| 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 89 resolved_val = training_progress.get("batch_size") | 
| 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 90 val_str = ( | 
| 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 91 "Auto-selected batch size by Ludwig:<br>" | 
| 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 92 f"<span style='font-size: 0.85em;'>{resolved_val}</span><br>" | 
| 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 93 ) | 
| 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 94 else: | 
| 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 95 val_str = "auto" | 
| 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 96 elif key == "learning_rate": | 
| 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 97 if val is not None and val != "auto": | 
| 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 98 val_str = f"{val:.6f}" | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 99 else: | 
| 8 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 100 if training_progress: | 
| 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 101 resolved_val = training_progress.get("learning_rate") | 
| 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 102 val_str = ( | 
| 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 103 "Auto-selected learning rate by Ludwig:<br>" | 
| 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 104 f"<span style='font-size: 0.85em;'>" | 
| 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 105 f"{resolved_val if resolved_val else 'auto'}</span><br>" | 
| 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 106 "<span style='font-size: 0.85em;'>" | 
| 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 107 "Based on model architecture and training setup " | 
| 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 108 "(e.g., fine-tuning).<br>" | 
| 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 109 "</span>" | 
| 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 110 ) | 
| 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 111 else: | 
| 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 112 val_str = ( | 
| 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 113 "Auto-selected by Ludwig<br>" | 
| 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 114 "<span style='font-size: 0.85em;'>" | 
| 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 115 "Automatically tuned based on architecture and dataset.<br>" | 
| 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 116 "See <a href='https://ludwig.ai/latest/configuration/trainer/" | 
| 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 117 "#trainer-parameters' target='_blank'>" | 
| 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 118 "Ludwig Trainer Parameters</a> for details." | 
| 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 119 "</span>" | 
| 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 120 ) | 
| 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 121 elif key == "epochs": | 
| 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 122 if val is None: | 
| 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 123 val_str = "N/A" | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 124 else: | 
| 8 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 125 if ( | 
| 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 126 training_progress | 
| 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 127 and "epoch" in training_progress | 
| 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 128 and val > training_progress["epoch"] | 
| 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 129 ): | 
| 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 130 val_str = ( | 
| 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 131 f"Because of early stopping: the training " | 
| 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 132 f"stopped at epoch {training_progress['epoch']}" | 
| 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 133 ) | 
| 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 134 else: | 
| 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 135 val_str = val | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 136 else: | 
| 8 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 137 val_str = val if val is not None else "N/A" | 
| 10 
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
 goeckslab parents: 
9diff
changeset | 138 if val_str == "N/A" and key not in ["task_type"]: | 
| 8 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 139 continue | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 140 rows.append( | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 141 f"<tr>" | 
| 10 
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
 goeckslab parents: 
9diff
changeset | 142 f"<td style='padding: 6px 12px; border: 1px solid #ccc; text-align: left; " | 
| 
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
 goeckslab parents: 
9diff
changeset | 143 f"white-space: normal; word-break: break-word; overflow-wrap: anywhere;'>" | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 144 f"{key.replace('_', ' ').title()}</td>" | 
| 10 
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
 goeckslab parents: 
9diff
changeset | 145 f"<td style='padding: 6px 12px; border: 1px solid #ccc; text-align: center; " | 
| 
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
 goeckslab parents: 
9diff
changeset | 146 f"white-space: normal; word-break: break-word; overflow-wrap: anywhere;'>" | 
| 8 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 147 f"{val_str}</td>" | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 148 f"</tr>" | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 149 ) | 
| 2 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 150 aug_cfg = config.get("augmentation") | 
| 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 151 if aug_cfg: | 
| 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 152 types = [str(a.get("type", "")) for a in aug_cfg] | 
| 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 153 aug_val = ", ".join(types) | 
| 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 154 rows.append( | 
| 10 
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
 goeckslab parents: 
9diff
changeset | 155 f"<tr><td style='padding: 6px 12px; border: 1px solid #ccc; text-align: left; " | 
| 
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
 goeckslab parents: 
9diff
changeset | 156 f"white-space: normal; word-break: break-word; overflow-wrap: anywhere;'>Augmentation</td>" | 
| 
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
 goeckslab parents: 
9diff
changeset | 157 f"<td style='padding: 6px 12px; border: 1px solid #ccc; text-align: center; " | 
| 
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
 goeckslab parents: 
9diff
changeset | 158 f"white-space: normal; word-break: break-word; overflow-wrap: anywhere;'>{aug_val}</td></tr>" | 
| 2 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 159 ) | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 160 if split_info: | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 161 rows.append( | 
| 10 
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
 goeckslab parents: 
9diff
changeset | 162 f"<tr><td style='padding: 6px 12px; border: 1px solid #ccc; text-align: left; " | 
| 
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
 goeckslab parents: 
9diff
changeset | 163 f"white-space: normal; word-break: break-word; overflow-wrap: anywhere;'>Data Split</td>" | 
| 
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
 goeckslab parents: 
9diff
changeset | 164 f"<td style='padding: 6px 12px; border: 1px solid #ccc; text-align: center; " | 
| 
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
 goeckslab parents: 
9diff
changeset | 165 f"white-space: normal; word-break: break-word; overflow-wrap: anywhere;'>{split_info}</td></tr>" | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 166 ) | 
| 8 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 167 html = f""" | 
| 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 168 <h2 style="text-align: center;">Model and Training Summary</h2> | 
| 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 169 <div style="display: flex; justify-content: center;"> | 
| 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 170 <table style="border-collapse: collapse; width: 100%; table-layout: fixed;"> | 
| 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 171 <thead><tr> | 
| 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 172 <th style="padding: 10px; border: 1px solid #ccc; text-align: left; white-space: nowrap; overflow: hidden; text-overflow: ellipsis;">Parameter</th> | 
| 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 173 <th style="padding: 10px; border: 1px solid #ccc; text-align: center; white-space: nowrap; overflow: hidden; text-overflow: ellipsis;">Value</th> | 
| 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 174 </tr></thead> | 
| 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 175 <tbody> | 
| 9 
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
 goeckslab parents: 
8diff
changeset | 176 {"".join(rows)} | 
| 8 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 177 </tbody> | 
| 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 178 </table> | 
| 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 179 </div><br> | 
| 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 180 <p style="text-align: center; font-size: 0.9em;"> | 
| 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 181 Model trained using <a href="https://ludwig.ai/" target="_blank" rel="noopener noreferrer">Ludwig</a>. | 
| 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 182 <a href="https://ludwig.ai/latest/configuration/" target="_blank" rel="noopener noreferrer"> | 
| 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 183 Ludwig documentation provides detailed information about default model and training parameters | 
| 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 184 </a> | 
| 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 185 </p><hr> | 
| 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 186 """ | 
| 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 187 return html | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 188 | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 189 | 
| 1 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 190 def detect_output_type(test_stats): | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 191 """Detects if the output type is 'binary' or 'category' based on test statistics.""" | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 192 label_stats = test_stats.get("label", {}) | 
| 2 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 193 if "mean_squared_error" in label_stats: | 
| 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 194 return "regression" | 
| 1 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 195 per_class = label_stats.get("per_class_stats", {}) | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 196 if len(per_class) == 2: | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 197 return "binary" | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 198 return "category" | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 199 | 
| 1 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 200 | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 201 def extract_metrics_from_json( | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 202 train_stats: dict, | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 203 test_stats: dict, | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 204 output_type: str, | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 205 ) -> dict: | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 206 """Extracts relevant metrics from training and test statistics based on the output type.""" | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 207 metrics = {"training": {}, "validation": {}, "test": {}} | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 208 | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 209 def get_last_value(stats, key): | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 210 val = stats.get(key) | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 211 if isinstance(val, list) and val: | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 212 return val[-1] | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 213 elif isinstance(val, (int, float)): | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 214 return val | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 215 return None | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 216 | 
| 1 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 217 for split in ["training", "validation"]: | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 218 split_stats = train_stats.get(split, {}) | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 219 if not split_stats: | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 220 logging.warning(f"No statistics found for {split} split") | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 221 continue | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 222 label_stats = split_stats.get("label", {}) | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 223 if not label_stats: | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 224 logging.warning(f"No label statistics found for {split} split") | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 225 continue | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 226 if output_type == "binary": | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 227 metrics[split] = { | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 228 "accuracy": get_last_value(label_stats, "accuracy"), | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 229 "loss": get_last_value(label_stats, "loss"), | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 230 "precision": get_last_value(label_stats, "precision"), | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 231 "recall": get_last_value(label_stats, "recall"), | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 232 "specificity": get_last_value(label_stats, "specificity"), | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 233 "roc_auc": get_last_value(label_stats, "roc_auc"), | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 234 } | 
| 2 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 235 elif output_type == "regression": | 
| 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 236 metrics[split] = { | 
| 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 237 "loss": get_last_value(label_stats, "loss"), | 
| 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 238 "mean_absolute_error": get_last_value( | 
| 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 239 label_stats, "mean_absolute_error" | 
| 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 240 ), | 
| 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 241 "mean_absolute_percentage_error": get_last_value( | 
| 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 242 label_stats, "mean_absolute_percentage_error" | 
| 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 243 ), | 
| 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 244 "mean_squared_error": get_last_value(label_stats, "mean_squared_error"), | 
| 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 245 "root_mean_squared_error": get_last_value( | 
| 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 246 label_stats, "root_mean_squared_error" | 
| 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 247 ), | 
| 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 248 "root_mean_squared_percentage_error": get_last_value( | 
| 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 249 label_stats, "root_mean_squared_percentage_error" | 
| 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 250 ), | 
| 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 251 "r2": get_last_value(label_stats, "r2"), | 
| 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 252 } | 
| 1 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 253 else: | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 254 metrics[split] = { | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 255 "accuracy": get_last_value(label_stats, "accuracy"), | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 256 "accuracy_micro": get_last_value(label_stats, "accuracy_micro"), | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 257 "loss": get_last_value(label_stats, "loss"), | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 258 "roc_auc": get_last_value(label_stats, "roc_auc"), | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 259 "hits_at_k": get_last_value(label_stats, "hits_at_k"), | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 260 } | 
| 9 
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
 goeckslab parents: 
8diff
changeset | 261 | 
| 1 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 262 # Test metrics: dynamic extraction according to exclusions | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 263 test_label_stats = test_stats.get("label", {}) | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 264 if not test_label_stats: | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 265 logging.warning("No label statistics found for test split") | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 266 else: | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 267 combined_stats = test_stats.get("combined", {}) | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 268 overall_stats = test_label_stats.get("overall_stats", {}) | 
| 9 
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
 goeckslab parents: 
8diff
changeset | 269 | 
| 1 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 270 # Define exclusions | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 271 if output_type == "binary": | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 272 exclude = {"per_class_stats", "precision_recall_curve", "roc_curve"} | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 273 else: | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 274 exclude = {"per_class_stats", "confusion_matrix"} | 
| 9 
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
 goeckslab parents: 
8diff
changeset | 275 | 
| 1 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 276 # 1. Get all scalar test_label_stats not excluded | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 277 test_metrics = {} | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 278 for k, v in test_label_stats.items(): | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 279 if k in exclude: | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 280 continue | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 281 if k == "overall_stats": | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 282 continue | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 283 if isinstance(v, (int, float, str, bool)): | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 284 test_metrics[k] = v | 
| 9 
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
 goeckslab parents: 
8diff
changeset | 285 | 
| 1 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 286 # 2. Add overall_stats (flattened) | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 287 for k, v in overall_stats.items(): | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 288 test_metrics[k] = v | 
| 9 
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
 goeckslab parents: 
8diff
changeset | 289 | 
| 1 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 290 # 3. Optionally include combined/loss if present and not already | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 291 if "loss" in combined_stats and "loss" not in test_metrics: | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 292 test_metrics["loss"] = combined_stats["loss"] | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 293 metrics["test"] = test_metrics | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 294 return metrics | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 295 | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 296 | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 297 def generate_table_row(cells, styles): | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 298 """Helper function to generate an HTML table row.""" | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 299 return ( | 
| 1 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 300 "<tr>" | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 301 + "".join(f"<td style='{styles}'>{cell}</td>" for cell in cells) | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 302 + "</tr>" | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 303 ) | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 304 | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 305 | 
| 8 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 306 # ----------------------------------------- | 
| 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 307 # 2) MODEL PERFORMANCE (Train/Val/Test) TABLE | 
| 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 308 # ----------------------------------------- | 
| 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 309 | 
| 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 310 | 
| 1 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 311 def format_stats_table_html(train_stats: dict, test_stats: dict) -> str: | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 312 """Formats a combined HTML table for training, validation, and test metrics.""" | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 313 output_type = detect_output_type(test_stats) | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 314 all_metrics = extract_metrics_from_json(train_stats, test_stats, output_type) | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 315 rows = [] | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 316 for metric_key in sorted(all_metrics["training"].keys()): | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 317 if ( | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 318 metric_key in all_metrics["validation"] | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 319 and metric_key in all_metrics["test"] | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 320 ): | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 321 display_name = METRIC_DISPLAY_NAMES.get( | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 322 metric_key, | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 323 metric_key.replace("_", " ").title(), | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 324 ) | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 325 t = all_metrics["training"].get(metric_key) | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 326 v = all_metrics["validation"].get(metric_key) | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 327 te = all_metrics["test"].get(metric_key) | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 328 if all(x is not None for x in [t, v, te]): | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 329 rows.append([display_name, f"{t:.4f}", f"{v:.4f}", f"{te:.4f}"]) | 
| 9 
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
 goeckslab parents: 
8diff
changeset | 330 | 
| 1 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 331 if not rows: | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 332 return "<table><tr><td>No metric values found.</td></tr></table>" | 
| 9 
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
 goeckslab parents: 
8diff
changeset | 333 | 
| 1 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 334 html = ( | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 335 "<h2 style='text-align: center;'>Model Performance Summary</h2>" | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 336 "<div style='display: flex; justify-content: center;'>" | 
| 8 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 337 "<table class='performance-summary' style='border-collapse: collapse;'>" | 
| 1 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 338 "<thead><tr>" | 
| 8 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 339 "<th class='sortable' style='padding: 10px; border: 1px solid #ccc; text-align: left; white-space: nowrap;'>Metric</th>" | 
| 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 340 "<th class='sortable' style='padding: 10px; border: 1px solid #ccc; text-align: center; white-space: nowrap;'>Train</th>" | 
| 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 341 "<th class='sortable' style='padding: 10px; border: 1px solid #ccc; text-align: center; white-space: nowrap;'>Validation</th>" | 
| 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 342 "<th class='sortable' style='padding: 10px; border: 1px solid #ccc; text-align: center; white-space: nowrap;'>Test</th>" | 
| 1 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 343 "</tr></thead><tbody>" | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 344 ) | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 345 for row in rows: | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 346 html += generate_table_row( | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 347 row, | 
| 9 
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
 goeckslab parents: 
8diff
changeset | 348 "padding: 10px; border: 1px solid #ccc; text-align: center; white-space: nowrap;", | 
| 1 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 349 ) | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 350 html += "</tbody></table></div><br>" | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 351 return html | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 352 | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 353 | 
| 8 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 354 # ------------------------------------------- | 
| 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 355 # 3) TRAIN/VALIDATION PERFORMANCE SUMMARY TABLE | 
| 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 356 # ------------------------------------------- | 
| 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 357 | 
| 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 358 | 
| 1 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 359 def format_train_val_stats_table_html(train_stats: dict, test_stats: dict) -> str: | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 360 """Formats an HTML table for training and validation metrics.""" | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 361 output_type = detect_output_type(test_stats) | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 362 all_metrics = extract_metrics_from_json(train_stats, test_stats, output_type) | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 363 rows = [] | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 364 for metric_key in sorted(all_metrics["training"].keys()): | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 365 if metric_key in all_metrics["validation"]: | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 366 display_name = METRIC_DISPLAY_NAMES.get( | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 367 metric_key, | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 368 metric_key.replace("_", " ").title(), | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 369 ) | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 370 t = all_metrics["training"].get(metric_key) | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 371 v = all_metrics["validation"].get(metric_key) | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 372 if t is not None and v is not None: | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 373 rows.append([display_name, f"{t:.4f}", f"{v:.4f}"]) | 
| 9 
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
 goeckslab parents: 
8diff
changeset | 374 | 
| 1 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 375 if not rows: | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 376 return "<table><tr><td>No metric values found for Train/Validation.</td></tr></table>" | 
| 9 
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
 goeckslab parents: 
8diff
changeset | 377 | 
| 1 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 378 html = ( | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 379 "<h2 style='text-align: center;'>Train/Validation Performance Summary</h2>" | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 380 "<div style='display: flex; justify-content: center;'>" | 
| 8 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 381 "<table class='performance-summary' style='border-collapse: collapse;'>" | 
| 1 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 382 "<thead><tr>" | 
| 8 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 383 "<th class='sortable' style='padding: 10px; border: 1px solid #ccc; text-align: left; white-space: nowrap;'>Metric</th>" | 
| 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 384 "<th class='sortable' style='padding: 10px; border: 1px solid #ccc; text-align: center; white-space: nowrap;'>Train</th>" | 
| 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 385 "<th class='sortable' style='padding: 10px; border: 1px solid #ccc; text-align: center; white-space: nowrap;'>Validation</th>" | 
| 1 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 386 "</tr></thead><tbody>" | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 387 ) | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 388 for row in rows: | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 389 html += generate_table_row( | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 390 row, | 
| 9 
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
 goeckslab parents: 
8diff
changeset | 391 "padding: 10px; border: 1px solid #ccc; text-align: center; white-space: nowrap;", | 
| 1 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 392 ) | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 393 html += "</tbody></table></div><br>" | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 394 return html | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 395 | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 396 | 
| 8 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 397 # ----------------------------------------- | 
| 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 398 # 4) TEST‐ONLY PERFORMANCE SUMMARY TABLE | 
| 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 399 # ----------------------------------------- | 
| 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 400 | 
| 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 401 | 
| 2 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 402 def format_test_merged_stats_table_html( | 
| 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 403 test_metrics: Dict[str, Optional[float]], | 
| 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 404 ) -> str: | 
| 1 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 405 """Formats an HTML table for test metrics.""" | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 406 rows = [] | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 407 for key in sorted(test_metrics.keys()): | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 408 display_name = METRIC_DISPLAY_NAMES.get(key, key.replace("_", " ").title()) | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 409 value = test_metrics[key] | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 410 if value is not None: | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 411 rows.append([display_name, f"{value:.4f}"]) | 
| 9 
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
 goeckslab parents: 
8diff
changeset | 412 | 
| 1 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 413 if not rows: | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 414 return "<table><tr><td>No test metric values found.</td></tr></table>" | 
| 9 
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
 goeckslab parents: 
8diff
changeset | 415 | 
| 1 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 416 html = ( | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 417 "<h2 style='text-align: center;'>Test Performance Summary</h2>" | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 418 "<div style='display: flex; justify-content: center;'>" | 
| 8 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 419 "<table class='performance-summary' style='border-collapse: collapse;'>" | 
| 1 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 420 "<thead><tr>" | 
| 8 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 421 "<th class='sortable' style='padding: 10px; border: 1px solid #ccc; text-align: left; white-space: nowrap;'>Metric</th>" | 
| 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 422 "<th class='sortable' style='padding: 10px; border: 1px solid #ccc; text-align: center; white-space: nowrap;'>Test</th>" | 
| 1 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 423 "</tr></thead><tbody>" | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 424 ) | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 425 for row in rows: | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 426 html += generate_table_row( | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 427 row, | 
| 9 
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
 goeckslab parents: 
8diff
changeset | 428 "padding: 10px; border: 1px solid #ccc; text-align: center; white-space: nowrap;", | 
| 1 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 429 ) | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 430 html += "</tbody></table></div><br>" | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 431 return html | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 432 | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 433 | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 434 def split_data_0_2( | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 435 df: pd.DataFrame, | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 436 split_column: str, | 
| 7 
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
 goeckslab parents: 
6diff
changeset | 437 validation_size: float = 0.1, | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 438 random_state: int = 42, | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 439 label_column: Optional[str] = None, | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 440 ) -> pd.DataFrame: | 
| 1 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 441 """Given a DataFrame whose split_column only contains {0,2}, re-assign a portion of the 0s to become 1s (validation).""" | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 442 out = df.copy() | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 443 out[split_column] = pd.to_numeric(out[split_column], errors="coerce").astype(int) | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 444 idx_train = out.index[out[split_column] == 0].tolist() | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 445 if not idx_train: | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 446 logger.info("No rows with split=0; nothing to do.") | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 447 return out | 
| 7 
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
 goeckslab parents: 
6diff
changeset | 448 # Always use stratify if possible | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 449 stratify_arr = None | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 450 if label_column and label_column in out.columns: | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 451 label_counts = out.loc[idx_train, label_column].value_counts() | 
| 7 
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
 goeckslab parents: 
6diff
changeset | 452 if label_counts.size > 1: | 
| 
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
 goeckslab parents: 
6diff
changeset | 453 # Force stratify even with fewer samples - adjust validation_size if needed | 
| 
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
 goeckslab parents: 
6diff
changeset | 454 min_samples_per_class = label_counts.min() | 
| 
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
 goeckslab parents: 
6diff
changeset | 455 if min_samples_per_class * validation_size < 1: | 
| 
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
 goeckslab parents: 
6diff
changeset | 456 # Adjust validation_size to ensure at least 1 sample per class, but do not exceed original validation_size | 
| 9 
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
 goeckslab parents: 
8diff
changeset | 457 adjusted_validation_size = min( | 
| 
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
 goeckslab parents: 
8diff
changeset | 458 validation_size, 1.0 / min_samples_per_class | 
| 
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
 goeckslab parents: 
8diff
changeset | 459 ) | 
| 7 
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
 goeckslab parents: 
6diff
changeset | 460 if adjusted_validation_size != validation_size: | 
| 
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
 goeckslab parents: 
6diff
changeset | 461 validation_size = adjusted_validation_size | 
| 9 
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
 goeckslab parents: 
8diff
changeset | 462 logger.info( | 
| 
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
 goeckslab parents: 
8diff
changeset | 463 f"Adjusted validation_size to {validation_size:.3f} to ensure at least one sample per class in validation" | 
| 
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
 goeckslab parents: 
8diff
changeset | 464 ) | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 465 stratify_arr = out.loc[idx_train, label_column] | 
| 7 
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
 goeckslab parents: 
6diff
changeset | 466 logger.info("Using stratified split for validation set") | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 467 else: | 
| 7 
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
 goeckslab parents: 
6diff
changeset | 468 logger.warning("Only one label class found; cannot stratify") | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 469 if validation_size <= 0: | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 470 logger.info("validation_size <= 0; keeping all as train.") | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 471 return out | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 472 if validation_size >= 1: | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 473 logger.info("validation_size >= 1; moving all train → validation.") | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 474 out.loc[idx_train, split_column] = 1 | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 475 return out | 
| 7 
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
 goeckslab parents: 
6diff
changeset | 476 # Always try stratified split first | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 477 try: | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 478 train_idx, val_idx = train_test_split( | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 479 idx_train, | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 480 test_size=validation_size, | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 481 random_state=random_state, | 
| 1 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 482 stratify=stratify_arr, | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 483 ) | 
| 7 
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
 goeckslab parents: 
6diff
changeset | 484 logger.info("Successfully applied stratified split") | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 485 except ValueError as e: | 
| 7 
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
 goeckslab parents: 
6diff
changeset | 486 logger.warning(f"Stratified split failed ({e}); falling back to random split.") | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 487 train_idx, val_idx = train_test_split( | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 488 idx_train, | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 489 test_size=validation_size, | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 490 random_state=random_state, | 
| 1 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 491 stratify=None, | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 492 ) | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 493 out.loc[train_idx, split_column] = 0 | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 494 out.loc[val_idx, split_column] = 1 | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 495 out[split_column] = out[split_column].astype(int) | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 496 return out | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 497 | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 498 | 
| 7 
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
 goeckslab parents: 
6diff
changeset | 499 def create_stratified_random_split( | 
| 
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
 goeckslab parents: 
6diff
changeset | 500 df: pd.DataFrame, | 
| 
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
 goeckslab parents: 
6diff
changeset | 501 split_column: str, | 
| 
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
 goeckslab parents: 
6diff
changeset | 502 split_probabilities: list = [0.7, 0.1, 0.2], | 
| 
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
 goeckslab parents: 
6diff
changeset | 503 random_state: int = 42, | 
| 
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
 goeckslab parents: 
6diff
changeset | 504 label_column: Optional[str] = None, | 
| 
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
 goeckslab parents: 
6diff
changeset | 505 ) -> pd.DataFrame: | 
| 
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
 goeckslab parents: 
6diff
changeset | 506 """Create a stratified random split when no split column exists.""" | 
| 
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
 goeckslab parents: 
6diff
changeset | 507 out = df.copy() | 
| 
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
 goeckslab parents: 
6diff
changeset | 508 # initialize split column | 
| 
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
 goeckslab parents: 
6diff
changeset | 509 out[split_column] = 0 | 
| 
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
 goeckslab parents: 
6diff
changeset | 510 if not label_column or label_column not in out.columns: | 
| 9 
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
 goeckslab parents: 
8diff
changeset | 511 logger.warning( | 
| 
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
 goeckslab parents: 
8diff
changeset | 512 "No label column found; using random split without stratification" | 
| 
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
 goeckslab parents: 
8diff
changeset | 513 ) | 
| 7 
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
 goeckslab parents: 
6diff
changeset | 514 # fall back to simple random assignment | 
| 
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
 goeckslab parents: 
6diff
changeset | 515 indices = out.index.tolist() | 
| 
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
 goeckslab parents: 
6diff
changeset | 516 np.random.seed(random_state) | 
| 
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
 goeckslab parents: 
6diff
changeset | 517 np.random.shuffle(indices) | 
| 
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
 goeckslab parents: 
6diff
changeset | 518 n_total = len(indices) | 
| 
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
 goeckslab parents: 
6diff
changeset | 519 n_train = int(n_total * split_probabilities[0]) | 
| 
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
 goeckslab parents: 
6diff
changeset | 520 n_val = int(n_total * split_probabilities[1]) | 
| 
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
 goeckslab parents: 
6diff
changeset | 521 out.loc[indices[:n_train], split_column] = 0 | 
| 
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
 goeckslab parents: 
6diff
changeset | 522 out.loc[indices[n_train:n_train + n_val], split_column] = 1 | 
| 
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
 goeckslab parents: 
6diff
changeset | 523 out.loc[indices[n_train + n_val:], split_column] = 2 | 
| 
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
 goeckslab parents: 
6diff
changeset | 524 return out.astype({split_column: int}) | 
| 
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
 goeckslab parents: 
6diff
changeset | 525 # check if stratification is possible | 
| 
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
 goeckslab parents: 
6diff
changeset | 526 label_counts = out[label_column].value_counts() | 
| 
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
 goeckslab parents: 
6diff
changeset | 527 min_samples_per_class = label_counts.min() | 
| 
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
 goeckslab parents: 
6diff
changeset | 528 # ensure we have enough samples for stratification: | 
| 
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
 goeckslab parents: 
6diff
changeset | 529 # Each class must have at least as many samples as the number of splits, | 
| 
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
 goeckslab parents: 
6diff
changeset | 530 # so that each split can receive at least one sample per class. | 
| 
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
 goeckslab parents: 
6diff
changeset | 531 min_samples_required = len(split_probabilities) | 
| 
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
 goeckslab parents: 
6diff
changeset | 532 if min_samples_per_class < min_samples_required: | 
| 
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
 goeckslab parents: 
6diff
changeset | 533 logger.warning( | 
| 
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
 goeckslab parents: 
6diff
changeset | 534 f"Insufficient samples per class for stratification (min: {min_samples_per_class}, required: {min_samples_required}); using random split" | 
| 
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
 goeckslab parents: 
6diff
changeset | 535 ) | 
| 
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
 goeckslab parents: 
6diff
changeset | 536 # fall back to simple random assignment | 
| 
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
 goeckslab parents: 
6diff
changeset | 537 indices = out.index.tolist() | 
| 
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
 goeckslab parents: 
6diff
changeset | 538 np.random.seed(random_state) | 
| 
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
 goeckslab parents: 
6diff
changeset | 539 np.random.shuffle(indices) | 
| 
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
 goeckslab parents: 
6diff
changeset | 540 n_total = len(indices) | 
| 
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
 goeckslab parents: 
6diff
changeset | 541 n_train = int(n_total * split_probabilities[0]) | 
| 
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
 goeckslab parents: 
6diff
changeset | 542 n_val = int(n_total * split_probabilities[1]) | 
| 
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
 goeckslab parents: 
6diff
changeset | 543 out.loc[indices[:n_train], split_column] = 0 | 
| 
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
 goeckslab parents: 
6diff
changeset | 544 out.loc[indices[n_train:n_train + n_val], split_column] = 1 | 
| 
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
 goeckslab parents: 
6diff
changeset | 545 out.loc[indices[n_train + n_val:], split_column] = 2 | 
| 
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
 goeckslab parents: 
6diff
changeset | 546 return out.astype({split_column: int}) | 
| 
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
 goeckslab parents: 
6diff
changeset | 547 logger.info("Using stratified random split for train/validation/test sets") | 
| 
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
 goeckslab parents: 
6diff
changeset | 548 # first split: separate test set | 
| 
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
 goeckslab parents: 
6diff
changeset | 549 train_val_idx, test_idx = train_test_split( | 
| 
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
 goeckslab parents: 
6diff
changeset | 550 out.index.tolist(), | 
| 
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
 goeckslab parents: 
6diff
changeset | 551 test_size=split_probabilities[2], | 
| 
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
 goeckslab parents: 
6diff
changeset | 552 random_state=random_state, | 
| 
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
 goeckslab parents: 
6diff
changeset | 553 stratify=out[label_column], | 
| 
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
 goeckslab parents: 
6diff
changeset | 554 ) | 
| 
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
 goeckslab parents: 
6diff
changeset | 555 # second split: separate training and validation from remaining data | 
| 9 
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
 goeckslab parents: 
8diff
changeset | 556 val_size_adjusted = split_probabilities[1] / ( | 
| 
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
 goeckslab parents: 
8diff
changeset | 557 split_probabilities[0] + split_probabilities[1] | 
| 
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
 goeckslab parents: 
8diff
changeset | 558 ) | 
| 7 
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
 goeckslab parents: 
6diff
changeset | 559 train_idx, val_idx = train_test_split( | 
| 
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
 goeckslab parents: 
6diff
changeset | 560 train_val_idx, | 
| 
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
 goeckslab parents: 
6diff
changeset | 561 test_size=val_size_adjusted, | 
| 
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
 goeckslab parents: 
6diff
changeset | 562 random_state=random_state, | 
| 
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
 goeckslab parents: 
6diff
changeset | 563 stratify=out.loc[train_val_idx, label_column], | 
| 
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
 goeckslab parents: 
6diff
changeset | 564 ) | 
| 
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
 goeckslab parents: 
6diff
changeset | 565 # assign split values | 
| 
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
 goeckslab parents: 
6diff
changeset | 566 out.loc[train_idx, split_column] = 0 | 
| 
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
 goeckslab parents: 
6diff
changeset | 567 out.loc[val_idx, split_column] = 1 | 
| 
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
 goeckslab parents: 
6diff
changeset | 568 out.loc[test_idx, split_column] = 2 | 
| 
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
 goeckslab parents: 
6diff
changeset | 569 logger.info("Successfully applied stratified random split") | 
| 9 
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
 goeckslab parents: 
8diff
changeset | 570 logger.info( | 
| 
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
 goeckslab parents: 
8diff
changeset | 571 f"Split counts: Train={len(train_idx)}, Val={len(val_idx)}, Test={len(test_idx)}" | 
| 
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
 goeckslab parents: 
8diff
changeset | 572 ) | 
| 7 
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
 goeckslab parents: 
6diff
changeset | 573 return out.astype({split_column: int}) | 
| 
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
 goeckslab parents: 
6diff
changeset | 574 | 
| 
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
 goeckslab parents: 
6diff
changeset | 575 | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 576 class Backend(Protocol): | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 577 """Interface for a machine learning backend.""" | 
| 9 
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
 goeckslab parents: 
8diff
changeset | 578 | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 579 def prepare_config( | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 580 self, | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 581 config_params: Dict[str, Any], | 
| 1 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 582 split_config: Dict[str, Any], | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 583 ) -> str: | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 584 ... | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 585 | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 586 def run_experiment( | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 587 self, | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 588 dataset_path: Path, | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 589 config_path: Path, | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 590 output_dir: Path, | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 591 random_seed: int, | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 592 ) -> None: | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 593 ... | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 594 | 
| 2 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 595 def generate_plots(self, output_dir: Path) -> None: | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 596 ... | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 597 | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 598 def generate_html_report( | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 599 self, | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 600 title: str, | 
| 2 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 601 output_dir: str, | 
| 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 602 config: Dict[str, Any], | 
| 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 603 split_info: str, | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 604 ) -> Path: | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 605 ... | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 606 | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 607 | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 608 class LudwigDirectBackend: | 
| 1 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 609 """Backend for running Ludwig experiments directly via the internal experiment_cli function.""" | 
| 9 
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
 goeckslab parents: 
8diff
changeset | 610 | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 611 def prepare_config( | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 612 self, | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 613 config_params: Dict[str, Any], | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 614 split_config: Dict[str, Any], | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 615 ) -> str: | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 616 logger.info("LudwigDirectBackend: Preparing YAML configuration.") | 
| 9 
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
 goeckslab parents: 
8diff
changeset | 617 | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 618 model_name = config_params.get("model_name", "resnet18") | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 619 use_pretrained = config_params.get("use_pretrained", False) | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 620 fine_tune = config_params.get("fine_tune", False) | 
| 2 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 621 if use_pretrained: | 
| 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 622 trainable = bool(fine_tune) | 
| 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 623 else: | 
| 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 624 trainable = True | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 625 epochs = config_params.get("epochs", 10) | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 626 batch_size = config_params.get("batch_size") | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 627 num_processes = config_params.get("preprocessing_num_processes", 1) | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 628 early_stop = config_params.get("early_stop", None) | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 629 learning_rate = config_params.get("learning_rate") | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 630 learning_rate = "auto" if learning_rate is None else float(learning_rate) | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 631 raw_encoder = MODEL_ENCODER_TEMPLATES.get(model_name, model_name) | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 632 if isinstance(raw_encoder, dict): | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 633 encoder_config = { | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 634 **raw_encoder, | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 635 "use_pretrained": use_pretrained, | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 636 "trainable": trainable, | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 637 } | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 638 else: | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 639 encoder_config = {"type": raw_encoder} | 
| 9 
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
 goeckslab parents: 
8diff
changeset | 640 | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 641 batch_size_cfg = batch_size or "auto" | 
| 9 
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
 goeckslab parents: 
8diff
changeset | 642 | 
| 1 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 643 label_column_path = config_params.get("label_column_data_path") | 
| 2 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 644 label_series = None | 
| 1 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 645 if label_column_path is not None and Path(label_column_path).exists(): | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 646 try: | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 647 label_series = pd.read_csv(label_column_path)[LABEL_COLUMN_NAME] | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 648 except Exception as e: | 
| 2 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 649 logger.warning(f"Could not read label column for task detection: {e}") | 
| 9 
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
 goeckslab parents: 
8diff
changeset | 650 | 
| 2 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 651 if ( | 
| 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 652 label_series is not None | 
| 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 653 and ptypes.is_numeric_dtype(label_series.dtype) | 
| 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 654 and label_series.nunique() > 10 | 
| 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 655 ): | 
| 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 656 task_type = "regression" | 
| 1 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 657 else: | 
| 2 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 658 task_type = "classification" | 
| 9 
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
 goeckslab parents: 
8diff
changeset | 659 | 
| 2 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 660 config_params["task_type"] = task_type | 
| 9 
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
 goeckslab parents: 
8diff
changeset | 661 | 
| 2 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 662 image_feat: Dict[str, Any] = { | 
| 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 663 "name": IMAGE_PATH_COLUMN_NAME, | 
| 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 664 "type": "image", | 
| 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 665 "encoder": encoder_config, | 
| 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 666 } | 
| 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 667 if config_params.get("augmentation") is not None: | 
| 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 668 image_feat["augmentation"] = config_params["augmentation"] | 
| 9 
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
 goeckslab parents: 
8diff
changeset | 669 | 
| 2 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 670 if task_type == "regression": | 
| 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 671 output_feat = { | 
| 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 672 "name": LABEL_COLUMN_NAME, | 
| 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 673 "type": "number", | 
| 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 674 "decoder": {"type": "regressor"}, | 
| 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 675 "loss": {"type": "mean_squared_error"}, | 
| 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 676 "evaluation": { | 
| 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 677 "metrics": [ | 
| 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 678 "mean_squared_error", | 
| 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 679 "mean_absolute_error", | 
| 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 680 "r2", | 
| 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 681 ] | 
| 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 682 }, | 
| 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 683 } | 
| 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 684 val_metric = config_params.get("validation_metric", "mean_squared_error") | 
| 9 
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
 goeckslab parents: 
8diff
changeset | 685 | 
| 2 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 686 else: | 
| 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 687 num_unique_labels = ( | 
| 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 688 label_series.nunique() if label_series is not None else 2 | 
| 1 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 689 ) | 
| 2 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 690 output_type = "binary" if num_unique_labels == 2 else "category" | 
| 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 691 output_feat = {"name": LABEL_COLUMN_NAME, "type": output_type} | 
| 8 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 692 if output_type == "binary" and config_params.get("threshold") is not None: | 
| 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 693 output_feat["threshold"] = float(config_params["threshold"]) | 
| 2 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 694 val_metric = None | 
| 9 
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
 goeckslab parents: 
8diff
changeset | 695 | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 696 conf: Dict[str, Any] = { | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 697 "model_type": "ecd", | 
| 2 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 698 "input_features": [image_feat], | 
| 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 699 "output_features": [output_feat], | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 700 "combiner": {"type": "concat"}, | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 701 "trainer": { | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 702 "epochs": epochs, | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 703 "early_stop": early_stop, | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 704 "batch_size": batch_size_cfg, | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 705 "learning_rate": learning_rate, | 
| 2 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 706 # only set validation_metric for regression | 
| 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 707 **({"validation_metric": val_metric} if val_metric else {}), | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 708 }, | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 709 "preprocessing": { | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 710 "split": split_config, | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 711 "num_processes": num_processes, | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 712 "in_memory": False, | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 713 }, | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 714 } | 
| 9 
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
 goeckslab parents: 
8diff
changeset | 715 | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 716 logger.debug("LudwigDirectBackend: Config dict built.") | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 717 try: | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 718 yaml_str = yaml.dump(conf, sort_keys=False, indent=2) | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 719 logger.info("LudwigDirectBackend: YAML config generated.") | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 720 return yaml_str | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 721 except Exception: | 
| 1 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 722 logger.error( | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 723 "LudwigDirectBackend: Failed to serialize YAML.", | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 724 exc_info=True, | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 725 ) | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 726 raise | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 727 | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 728 def run_experiment( | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 729 self, | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 730 dataset_path: Path, | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 731 config_path: Path, | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 732 output_dir: Path, | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 733 random_seed: int = 42, | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 734 ) -> None: | 
| 1 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 735 """Invoke Ludwig's internal experiment_cli function to run the experiment.""" | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 736 logger.info("LudwigDirectBackend: Starting experiment execution.") | 
| 9 
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
 goeckslab parents: 
8diff
changeset | 737 | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 738 try: | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 739 from ludwig.experiment import experiment_cli | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 740 except ImportError as e: | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 741 logger.error( | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 742 "LudwigDirectBackend: Could not import experiment_cli.", | 
| 1 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 743 exc_info=True, | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 744 ) | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 745 raise RuntimeError("Ludwig import failed.") from e | 
| 9 
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
 goeckslab parents: 
8diff
changeset | 746 | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 747 output_dir.mkdir(parents=True, exist_ok=True) | 
| 9 
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
 goeckslab parents: 
8diff
changeset | 748 | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 749 try: | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 750 experiment_cli( | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 751 dataset=str(dataset_path), | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 752 config=str(config_path), | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 753 output_directory=str(output_dir), | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 754 random_seed=random_seed, | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 755 ) | 
| 1 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 756 logger.info( | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 757 f"LudwigDirectBackend: Experiment completed. Results in {output_dir}" | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 758 ) | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 759 except TypeError as e: | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 760 logger.error( | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 761 "LudwigDirectBackend: Argument mismatch in experiment_cli call.", | 
| 1 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 762 exc_info=True, | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 763 ) | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 764 raise RuntimeError("Ludwig argument error.") from e | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 765 except Exception: | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 766 logger.error( | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 767 "LudwigDirectBackend: Experiment execution error.", | 
| 1 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 768 exc_info=True, | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 769 ) | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 770 raise | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 771 | 
| 2 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 772 def get_training_process(self, output_dir) -> Optional[Dict[str, Any]]: | 
| 1 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 773 """Retrieve the learning rate used in the most recent Ludwig run.""" | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 774 output_dir = Path(output_dir) | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 775 exp_dirs = sorted( | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 776 output_dir.glob("experiment_run*"), | 
| 1 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 777 key=lambda p: p.stat().st_mtime, | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 778 ) | 
| 9 
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
 goeckslab parents: 
8diff
changeset | 779 | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 780 if not exp_dirs: | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 781 logger.warning(f"No experiment run directories found in {output_dir}") | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 782 return None | 
| 9 
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
 goeckslab parents: 
8diff
changeset | 783 | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 784 progress_file = exp_dirs[-1] / "model" / "training_progress.json" | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 785 if not progress_file.exists(): | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 786 logger.warning(f"No training_progress.json found in {progress_file}") | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 787 return None | 
| 9 
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
 goeckslab parents: 
8diff
changeset | 788 | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 789 try: | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 790 with progress_file.open("r", encoding="utf-8") as f: | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 791 data = json.load(f) | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 792 return { | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 793 "learning_rate": data.get("learning_rate"), | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 794 "batch_size": data.get("batch_size"), | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 795 "epoch": data.get("epoch"), | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 796 } | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 797 except Exception as e: | 
| 1 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 798 logger.warning(f"Failed to read training progress info: {e}") | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 799 return {} | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 800 | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 801 def convert_parquet_to_csv(self, output_dir: Path): | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 802 """Convert the predictions Parquet file to CSV.""" | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 803 output_dir = Path(output_dir) | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 804 exp_dirs = sorted( | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 805 output_dir.glob("experiment_run*"), | 
| 1 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 806 key=lambda p: p.stat().st_mtime, | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 807 ) | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 808 if not exp_dirs: | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 809 logger.warning(f"No experiment run dirs found in {output_dir}") | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 810 return | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 811 exp_dir = exp_dirs[-1] | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 812 parquet_path = exp_dir / PREDICTIONS_PARQUET_FILE_NAME | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 813 csv_path = exp_dir / "predictions.csv" | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 814 try: | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 815 df = pd.read_parquet(parquet_path) | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 816 df.to_csv(csv_path, index=False) | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 817 logger.info(f"Converted Parquet to CSV: {csv_path}") | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 818 except Exception as e: | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 819 logger.error(f"Error converting Parquet to CSV: {e}") | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 820 | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 821 def generate_plots(self, output_dir: Path) -> None: | 
| 1 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 822 """Generate all registered Ludwig visualizations for the latest experiment run.""" | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 823 logger.info("Generating all Ludwig visualizations…") | 
| 9 
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
 goeckslab parents: 
8diff
changeset | 824 | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 825 test_plots = { | 
| 1 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 826 "compare_performance", | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 827 "compare_classifiers_performance_from_prob", | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 828 "compare_classifiers_performance_from_pred", | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 829 "compare_classifiers_performance_changing_k", | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 830 "compare_classifiers_multiclass_multimetric", | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 831 "compare_classifiers_predictions", | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 832 "confidence_thresholding_2thresholds_2d", | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 833 "confidence_thresholding_2thresholds_3d", | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 834 "confidence_thresholding", | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 835 "confidence_thresholding_data_vs_acc", | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 836 "binary_threshold_vs_metric", | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 837 "roc_curves", | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 838 "roc_curves_from_test_statistics", | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 839 "calibration_1_vs_all", | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 840 "calibration_multiclass", | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 841 "confusion_matrix", | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 842 "frequency_vs_f1", | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 843 } | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 844 train_plots = { | 
| 1 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 845 "learning_curves", | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 846 "compare_classifiers_performance_subset", | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 847 } | 
| 9 
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
 goeckslab parents: 
8diff
changeset | 848 | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 849 output_dir = Path(output_dir) | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 850 exp_dirs = sorted( | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 851 output_dir.glob("experiment_run*"), | 
| 1 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 852 key=lambda p: p.stat().st_mtime, | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 853 ) | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 854 if not exp_dirs: | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 855 logger.warning(f"No experiment run dirs found in {output_dir}") | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 856 return | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 857 exp_dir = exp_dirs[-1] | 
| 9 
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
 goeckslab parents: 
8diff
changeset | 858 | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 859 viz_dir = exp_dir / "visualizations" | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 860 viz_dir.mkdir(exist_ok=True) | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 861 train_viz = viz_dir / "train" | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 862 test_viz = viz_dir / "test" | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 863 train_viz.mkdir(parents=True, exist_ok=True) | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 864 test_viz.mkdir(parents=True, exist_ok=True) | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 865 | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 866 def _check(p: Path) -> Optional[str]: | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 867 return str(p) if p.exists() else None | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 868 | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 869 training_stats = _check(exp_dir / "training_statistics.json") | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 870 test_stats = _check(exp_dir / TEST_STATISTICS_FILE_NAME) | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 871 probs_path = _check(exp_dir / PREDICTIONS_PARQUET_FILE_NAME) | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 872 gt_metadata = _check(exp_dir / "model" / TRAIN_SET_METADATA_FILE_NAME) | 
| 9 
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
 goeckslab parents: 
8diff
changeset | 873 | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 874 dataset_path = None | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 875 split_file = None | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 876 desc = exp_dir / DESCRIPTION_FILE_NAME | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 877 if desc.exists(): | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 878 with open(desc, "r") as f: | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 879 cfg = json.load(f) | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 880 dataset_path = _check(Path(cfg.get("dataset", ""))) | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 881 split_file = _check(Path(get_split_path(cfg.get("dataset", "")))) | 
| 9 
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
 goeckslab parents: 
8diff
changeset | 882 | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 883 output_feature = "" | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 884 if desc.exists(): | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 885 try: | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 886 output_feature = cfg["config"]["output_features"][0]["name"] | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 887 except Exception: | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 888 pass | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 889 if not output_feature and test_stats: | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 890 with open(test_stats, "r") as f: | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 891 stats = json.load(f) | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 892 output_feature = next(iter(stats.keys()), "") | 
| 9 
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
 goeckslab parents: 
8diff
changeset | 893 | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 894 viz_registry = get_visualizations_registry() | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 895 for viz_name, viz_func in viz_registry.items(): | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 896 if viz_name in train_plots: | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 897 viz_dir_plot = train_viz | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 898 elif viz_name in test_plots: | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 899 viz_dir_plot = test_viz | 
| 2 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 900 else: | 
| 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 901 continue | 
| 9 
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
 goeckslab parents: 
8diff
changeset | 902 | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 903 try: | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 904 viz_func( | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 905 training_statistics=[training_stats] if training_stats else [], | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 906 test_statistics=[test_stats] if test_stats else [], | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 907 probabilities=[probs_path] if probs_path else [], | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 908 output_feature_name=output_feature, | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 909 ground_truth_split=2, | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 910 top_n_classes=[0], | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 911 top_k=3, | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 912 ground_truth_metadata=gt_metadata, | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 913 ground_truth=dataset_path, | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 914 split_file=split_file, | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 915 output_directory=str(viz_dir_plot), | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 916 normalize=False, | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 917 file_format="png", | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 918 ) | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 919 logger.info(f"✔ Generated {viz_name}") | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 920 except Exception as e: | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 921 logger.warning(f"✘ Skipped {viz_name}: {e}") | 
| 9 
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
 goeckslab parents: 
8diff
changeset | 922 | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 923 logger.info(f"All visualizations written to {viz_dir}") | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 924 | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 925 def generate_html_report( | 
| 1 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 926 self, | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 927 title: str, | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 928 output_dir: str, | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 929 config: dict, | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 930 split_info: str, | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 931 ) -> Path: | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 932 """Assemble an HTML report from visualizations under train_val/ and test/ folders.""" | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 933 cwd = Path.cwd() | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 934 report_name = title.lower().replace(" ", "_") + "_report.html" | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 935 report_path = cwd / report_name | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 936 output_dir = Path(output_dir) | 
| 2 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 937 output_type = None | 
| 9 
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
 goeckslab parents: 
8diff
changeset | 938 | 
| 1 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 939 exp_dirs = sorted( | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 940 output_dir.glob("experiment_run*"), | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 941 key=lambda p: p.stat().st_mtime, | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 942 ) | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 943 if not exp_dirs: | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 944 raise RuntimeError(f"No 'experiment*' dirs found in {output_dir}") | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 945 exp_dir = exp_dirs[-1] | 
| 9 
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
 goeckslab parents: 
8diff
changeset | 946 | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 947 base_viz_dir = exp_dir / "visualizations" | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 948 train_viz_dir = base_viz_dir / "train" | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 949 test_viz_dir = base_viz_dir / "test" | 
| 9 
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
 goeckslab parents: 
8diff
changeset | 950 | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 951 html = get_html_template() | 
| 10 
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
 goeckslab parents: 
9diff
changeset | 952 | 
| 
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
 goeckslab parents: 
9diff
changeset | 953 # Extra CSS & JS: center Plotly and enable CSV download for predictions table | 
| 
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
 goeckslab parents: 
9diff
changeset | 954 html += """ | 
| 
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
 goeckslab parents: 
9diff
changeset | 955 <style> | 
| 
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
 goeckslab parents: 
9diff
changeset | 956 /* Center Plotly figures (both wrapper and native classes) */ | 
| 
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
 goeckslab parents: 
9diff
changeset | 957 .plotly-center { display: flex; justify-content: center; } | 
| 
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
 goeckslab parents: 
9diff
changeset | 958 .plotly-center .plotly-graph-div, .plotly-center .js-plotly-plot { margin: 0 auto !important; } | 
| 
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
 goeckslab parents: 
9diff
changeset | 959 .js-plotly-plot, .plotly-graph-div { margin-left: auto !important; margin-right: auto !important; } | 
| 
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
 goeckslab parents: 
9diff
changeset | 960 | 
| 
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
 goeckslab parents: 
9diff
changeset | 961 /* Download button for predictions table */ | 
| 
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
 goeckslab parents: 
9diff
changeset | 962 .download-btn { | 
| 
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
 goeckslab parents: 
9diff
changeset | 963 padding: 8px 12px; | 
| 
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
 goeckslab parents: 
9diff
changeset | 964 border: 1px solid #4CAF50; | 
| 
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
 goeckslab parents: 
9diff
changeset | 965 background: #4CAF50; | 
| 
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
 goeckslab parents: 
9diff
changeset | 966 color: white; | 
| 
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
 goeckslab parents: 
9diff
changeset | 967 border-radius: 6px; | 
| 
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
 goeckslab parents: 
9diff
changeset | 968 cursor: pointer; | 
| 
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
 goeckslab parents: 
9diff
changeset | 969 } | 
| 
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
 goeckslab parents: 
9diff
changeset | 970 .download-btn:hover { filter: brightness(0.95); } | 
| 
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
 goeckslab parents: 
9diff
changeset | 971 .preds-controls { | 
| 
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
 goeckslab parents: 
9diff
changeset | 972 display: flex; | 
| 
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
 goeckslab parents: 
9diff
changeset | 973 justify-content: flex-end; | 
| 
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
 goeckslab parents: 
9diff
changeset | 974 gap: 8px; | 
| 
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
 goeckslab parents: 
9diff
changeset | 975 margin: 8px 0; | 
| 
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
 goeckslab parents: 
9diff
changeset | 976 } | 
| 
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
 goeckslab parents: 
9diff
changeset | 977 </style> | 
| 
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
 goeckslab parents: 
9diff
changeset | 978 <script> | 
| 
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
 goeckslab parents: 
9diff
changeset | 979 function tableToCSV(table){ | 
| 
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
 goeckslab parents: 
9diff
changeset | 980 const rows = Array.from(table.querySelectorAll('tr')); | 
| 
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
 goeckslab parents: 
9diff
changeset | 981 return rows.map(row => | 
| 
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
 goeckslab parents: 
9diff
changeset | 982 Array.from(row.querySelectorAll('th,td')).map(cell => { | 
| 
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
 goeckslab parents: 
9diff
changeset | 983 let text = cell.innerText.replace(/\\r?\\n|\\r/g,' ').trim(); | 
| 
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
 goeckslab parents: 
9diff
changeset | 984 if (text.includes('"') || text.includes(',')) { | 
| 
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
 goeckslab parents: 
9diff
changeset | 985 text = '"' + text.replace(/"/g,'""') + '"'; | 
| 
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
 goeckslab parents: 
9diff
changeset | 986 } | 
| 
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
 goeckslab parents: 
9diff
changeset | 987 return text; | 
| 
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
 goeckslab parents: 
9diff
changeset | 988 }).join(',') | 
| 
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
 goeckslab parents: 
9diff
changeset | 989 ).join('\\n'); | 
| 
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
 goeckslab parents: 
9diff
changeset | 990 } | 
| 
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
 goeckslab parents: 
9diff
changeset | 991 document.addEventListener('DOMContentLoaded', function(){ | 
| 
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
 goeckslab parents: 
9diff
changeset | 992 const btn = document.getElementById('downloadPredsCsv'); | 
| 
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
 goeckslab parents: 
9diff
changeset | 993 if(btn){ | 
| 
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
 goeckslab parents: 
9diff
changeset | 994 btn.addEventListener('click', function(){ | 
| 
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
 goeckslab parents: 
9diff
changeset | 995 const tbl = document.querySelector('.predictions-table'); | 
| 
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
 goeckslab parents: 
9diff
changeset | 996 if(!tbl){ alert('Predictions table not found.'); return; } | 
| 
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
 goeckslab parents: 
9diff
changeset | 997 const csv = tableToCSV(tbl); | 
| 
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
 goeckslab parents: 
9diff
changeset | 998 const blob = new Blob([csv], {type: 'text/csv;charset=utf-8;'}); | 
| 
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
 goeckslab parents: 
9diff
changeset | 999 const url = URL.createObjectURL(blob); | 
| 
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
 goeckslab parents: 
9diff
changeset | 1000 const a = document.createElement('a'); | 
| 
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
 goeckslab parents: 
9diff
changeset | 1001 a.href = url; | 
| 
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
 goeckslab parents: 
9diff
changeset | 1002 a.download = 'ground_truth_vs_predictions.csv'; | 
| 
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
 goeckslab parents: 
9diff
changeset | 1003 document.body.appendChild(a); | 
| 
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
 goeckslab parents: 
9diff
changeset | 1004 a.click(); | 
| 
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
 goeckslab parents: 
9diff
changeset | 1005 document.body.removeChild(a); | 
| 
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
 goeckslab parents: 
9diff
changeset | 1006 URL.revokeObjectURL(url); | 
| 
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
 goeckslab parents: 
9diff
changeset | 1007 }); | 
| 
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
 goeckslab parents: 
9diff
changeset | 1008 } | 
| 
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
 goeckslab parents: 
9diff
changeset | 1009 }); | 
| 
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
 goeckslab parents: 
9diff
changeset | 1010 </script> | 
| 
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
 goeckslab parents: 
9diff
changeset | 1011 """ | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1012 html += f"<h1>{title}</h1>" | 
| 9 
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
 goeckslab parents: 
8diff
changeset | 1013 | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1014 metrics_html = "" | 
| 1 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 1015 train_val_metrics_html = "" | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 1016 test_metrics_html = "" | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1017 try: | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1018 train_stats_path = exp_dir / "training_statistics.json" | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1019 test_stats_path = exp_dir / TEST_STATISTICS_FILE_NAME | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1020 if train_stats_path.exists() and test_stats_path.exists(): | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1021 with open(train_stats_path) as f: | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1022 train_stats = json.load(f) | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1023 with open(test_stats_path) as f: | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1024 test_stats = json.load(f) | 
| 1 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 1025 output_type = detect_output_type(test_stats) | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 1026 metrics_html = format_stats_table_html(train_stats, test_stats) | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 1027 train_val_metrics_html = format_train_val_stats_table_html( | 
| 2 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 1028 train_stats, test_stats | 
| 1 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 1029 ) | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 1030 test_metrics_html = format_test_merged_stats_table_html( | 
| 2 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 1031 extract_metrics_from_json(train_stats, test_stats, output_type)[ | 
| 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 1032 "test" | 
| 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 1033 ] | 
| 1 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 1034 ) | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1035 except Exception as e: | 
| 1 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 1036 logger.warning( | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 1037 f"Could not load stats for HTML report: {type(e).__name__}: {e}" | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 1038 ) | 
| 9 
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
 goeckslab parents: 
8diff
changeset | 1039 | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1040 config_html = "" | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1041 training_progress = self.get_training_process(output_dir) | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1042 try: | 
| 2 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 1043 config_html = format_config_table_html( | 
| 9 
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
 goeckslab parents: 
8diff
changeset | 1044 config, split_info, training_progress, output_type | 
| 2 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 1045 ) | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1046 except Exception as e: | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1047 logger.warning(f"Could not load config for HTML report: {e}") | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1048 | 
| 10 
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
 goeckslab parents: 
9diff
changeset | 1049 # ---------- image rendering with exclusions ---------- | 
| 2 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 1050 def render_img_section( | 
| 10 
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
 goeckslab parents: 
9diff
changeset | 1051 title: str, | 
| 
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
 goeckslab parents: 
9diff
changeset | 1052 dir_path: Path, | 
| 
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
 goeckslab parents: 
9diff
changeset | 1053 output_type: str = None, | 
| 
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
 goeckslab parents: 
9diff
changeset | 1054 exclude_names: Optional[set] = None, | 
| 2 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 1055 ) -> str: | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1056 if not dir_path.exists(): | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1057 return f"<h2>{title}</h2><p><em>Directory not found.</em></p>" | 
| 10 
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
 goeckslab parents: 
9diff
changeset | 1058 | 
| 
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
 goeckslab parents: 
9diff
changeset | 1059 exclude_names = exclude_names or set() | 
| 
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
 goeckslab parents: 
9diff
changeset | 1060 | 
| 1 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 1061 imgs = list(dir_path.glob("*.png")) | 
| 10 
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
 goeckslab parents: 
9diff
changeset | 1062 | 
| 
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
 goeckslab parents: 
9diff
changeset | 1063 default_exclude = {"confusion_matrix.png", "roc_curves.png"} | 
| 
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
 goeckslab parents: 
9diff
changeset | 1064 | 
| 8 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 1065 imgs = [ | 
| 9 
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
 goeckslab parents: 
8diff
changeset | 1066 img | 
| 
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
 goeckslab parents: 
8diff
changeset | 1067 for img in imgs | 
| 10 
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
 goeckslab parents: 
9diff
changeset | 1068 if img.name not in default_exclude | 
| 
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
 goeckslab parents: 
9diff
changeset | 1069 and img.name not in exclude_names | 
| 
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
 goeckslab parents: 
9diff
changeset | 1070 and not img.name.startswith("confusion_matrix__label_top") | 
| 8 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 1071 ] | 
| 10 
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
 goeckslab parents: 
9diff
changeset | 1072 | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1073 if not imgs: | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1074 return f"<h2>{title}</h2><p><em>No plots found.</em></p>" | 
| 10 
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
 goeckslab parents: 
9diff
changeset | 1075 | 
| 8 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 1076 if output_type == "binary": | 
| 1 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 1077 order = [ | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 1078 "roc_curves_from_prediction_statistics.png", | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 1079 "compare_performance_label.png", | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 1080 "confusion_matrix_entropy__label_top2.png", | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 1081 ] | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 1082 img_names = {img.name: img for img in imgs} | 
| 8 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 1083 ordered = [img_names[n] for n in order if n in img_names] | 
| 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 1084 others = sorted(img for img in imgs if img.name not in order) | 
| 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 1085 imgs = ordered + others | 
| 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 1086 elif output_type == "category": | 
| 1 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 1087 unwanted = { | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 1088 "compare_classifiers_multiclass_multimetric__label_best10.png", | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 1089 "compare_classifiers_multiclass_multimetric__label_top10.png", | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 1090 "compare_classifiers_multiclass_multimetric__label_worst10.png", | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 1091 } | 
| 10 
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
 goeckslab parents: 
9diff
changeset | 1092 valid_imgs = [img for img in imgs if img.name not in unwanted] | 
| 1 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 1093 display_order = [ | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 1094 "roc_curves.png", | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 1095 "compare_performance_label.png", | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 1096 "compare_classifiers_performance_from_prob.png", | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 1097 "confusion_matrix_entropy__label_top10.png", | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 1098 ] | 
| 8 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 1099 img_map = {img.name: img for img in valid_imgs} | 
| 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 1100 ordered = [img_map[n] for n in display_order if n in img_map] | 
| 9 
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
 goeckslab parents: 
8diff
changeset | 1101 others = sorted( | 
| 
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
 goeckslab parents: 
8diff
changeset | 1102 img for img in valid_imgs if img.name not in display_order | 
| 
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
 goeckslab parents: 
8diff
changeset | 1103 ) | 
| 8 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 1104 imgs = ordered + others | 
| 1 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 1105 else: | 
| 8 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 1106 imgs = sorted(imgs) | 
| 10 
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
 goeckslab parents: 
9diff
changeset | 1107 | 
| 
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
 goeckslab parents: 
9diff
changeset | 1108 html_section = "" | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1109 for img in imgs: | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1110 b64 = encode_image_to_base64(str(img)) | 
| 8 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 1111 img_title = img.stem.replace("_", " ").title() | 
| 10 
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
 goeckslab parents: 
9diff
changeset | 1112 html_section += ( | 
| 8 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 1113 f"<h2 style='text-align: center;'>{img_title}</h2>" | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1114 f'<div class="plot" style="margin-bottom:20px;text-align:center;">' | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1115 f'<img src="data:image/png;base64,{b64}" ' | 
| 1 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 1116 f'style="max-width:90%;max-height:600px;border:1px solid #ddd;" />' | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 1117 f"</div>" | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1118 ) | 
| 10 
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
 goeckslab parents: 
9diff
changeset | 1119 return html_section | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1120 | 
| 2 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 1121 tab1_content = config_html + metrics_html | 
| 10 
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
 goeckslab parents: 
9diff
changeset | 1122 | 
| 2 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 1123 tab2_content = train_val_metrics_html + render_img_section( | 
| 10 
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
 goeckslab parents: 
9diff
changeset | 1124 "Training and Validation Visualizations", | 
| 
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
 goeckslab parents: 
9diff
changeset | 1125 train_viz_dir, | 
| 
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
 goeckslab parents: 
9diff
changeset | 1126 output_type, | 
| 
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
 goeckslab parents: 
9diff
changeset | 1127 exclude_names={ | 
| 
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
 goeckslab parents: 
9diff
changeset | 1128 "compare_classifiers_performance_from_prob.png", | 
| 
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
 goeckslab parents: 
9diff
changeset | 1129 "roc_curves_from_prediction_statistics.png", | 
| 
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
 goeckslab parents: 
9diff
changeset | 1130 "precision_recall_curves_from_prediction_statistics.png", | 
| 
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
 goeckslab parents: 
9diff
changeset | 1131 "precision_recall_curve.png", | 
| 
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
 goeckslab parents: 
9diff
changeset | 1132 }, | 
| 1 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 1133 ) | 
| 10 
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
 goeckslab parents: 
9diff
changeset | 1134 | 
| 
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
 goeckslab parents: 
9diff
changeset | 1135 # --- Predictions vs Ground Truth table (REGRESSION ONLY) --- | 
| 2 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 1136 preds_section = "" | 
| 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 1137 parquet_path = exp_dir / PREDICTIONS_PARQUET_FILE_NAME | 
| 8 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 1138 if output_type == "regression" and parquet_path.exists(): | 
| 2 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 1139 try: | 
| 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 1140 # 1) load predictions from Parquet | 
| 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 1141 df_preds = pd.read_parquet(parquet_path).reset_index(drop=True) | 
| 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 1142 # assume the column containing your model's prediction is named "prediction" | 
| 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 1143 pred_col = next( | 
| 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 1144 (c for c in df_preds.columns if "prediction" in c.lower()), | 
| 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 1145 None, | 
| 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 1146 ) | 
| 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 1147 if pred_col is None: | 
| 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 1148 raise ValueError("No prediction column found in Parquet output") | 
| 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 1149 df_pred = df_preds[[pred_col]].rename(columns={pred_col: "prediction"}) | 
| 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 1150 # 2) load ground truth for the test split from prepared CSV | 
| 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 1151 df_all = pd.read_csv(config["label_column_data_path"]) | 
| 9 
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
 goeckslab parents: 
8diff
changeset | 1152 df_gt = df_all[df_all[SPLIT_COLUMN_NAME] == 2][ | 
| 
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
 goeckslab parents: 
8diff
changeset | 1153 LABEL_COLUMN_NAME | 
| 
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
 goeckslab parents: 
8diff
changeset | 1154 ].reset_index(drop=True) | 
| 8 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 1155 # 3) concatenate side-by-side | 
| 2 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 1156 df_table = pd.concat([df_gt, df_pred], axis=1) | 
| 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 1157 df_table.columns = [LABEL_COLUMN_NAME, "prediction"] | 
| 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 1158 # 4) render as HTML | 
| 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 1159 preds_html = df_table.to_html(index=False, classes="predictions-table") | 
| 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 1160 preds_section = ( | 
| 8 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 1161 "<h2 style='text-align: center;'>Ground Truth vs. Predictions</h2>" | 
| 10 
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
 goeckslab parents: 
9diff
changeset | 1162 "<div class='preds-controls'>" | 
| 
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
 goeckslab parents: 
9diff
changeset | 1163 "<button id='downloadPredsCsv' class='download-btn'>Download CSV</button>" | 
| 
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
 goeckslab parents: 
9diff
changeset | 1164 "</div>" | 
| 
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
 goeckslab parents: 
9diff
changeset | 1165 "<div class='scroll-rows-30' style='overflow-x:auto; overflow-y:auto; max-height:900px; margin-bottom:20px;'>" | 
| 2 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 1166 + preds_html | 
| 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 1167 + "</div>" | 
| 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 1168 ) | 
| 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 1169 except Exception as e: | 
| 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 1170 logger.warning(f"Could not build Predictions vs GT table: {e}") | 
| 10 
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
 goeckslab parents: 
9diff
changeset | 1171 | 
| 8 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 1172 tab3_content = test_metrics_html + preds_section | 
| 10 
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
 goeckslab parents: 
9diff
changeset | 1173 | 
| 
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
 goeckslab parents: 
9diff
changeset | 1174 # Classification-only interactive Plotly panels (centered) | 
| 8 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 1175 if output_type in ("binary", "category"): | 
| 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 1176 training_stats_path = exp_dir / "training_statistics.json" | 
| 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 1177 interactive_plots = build_classification_plots( | 
| 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 1178 str(test_stats_path), | 
| 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 1179 str(training_stats_path), | 
| 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 1180 ) | 
| 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 1181 for plot in interactive_plots: | 
| 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 1182 tab3_content += ( | 
| 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 1183 f"<h2 style='text-align: center;'>{plot['title']}</h2>" | 
| 10 
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
 goeckslab parents: 
9diff
changeset | 1184 f"<div class='plotly-center'>{plot['html']}</div>" | 
| 8 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 1185 ) | 
| 10 
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
 goeckslab parents: 
9diff
changeset | 1186 | 
| 
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
 goeckslab parents: 
9diff
changeset | 1187 # Add static TEST PNGs (with default dedupe/exclusions) | 
| 
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
 goeckslab parents: 
9diff
changeset | 1188 tab3_content += render_img_section( | 
| 
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
 goeckslab parents: 
9diff
changeset | 1189 "Test Visualizations", test_viz_dir, output_type | 
| 
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
 goeckslab parents: 
9diff
changeset | 1190 ) | 
| 
b0d893d04d4c
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1594d503179f28987720594eb49b48a15486f073
 goeckslab parents: 
9diff
changeset | 1191 | 
| 1 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 1192 tabbed_html = build_tabbed_html(tab1_content, tab2_content, tab3_content) | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 1193 modal_html = get_metrics_help_modal() | 
| 2 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 1194 html += tabbed_html + modal_html + get_html_closing() | 
| 9 
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
 goeckslab parents: 
8diff
changeset | 1195 | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1196 try: | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1197 with open(report_path, "w") as f: | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1198 f.write(html) | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1199 logger.info(f"HTML report generated at: {report_path}") | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1200 except Exception as e: | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1201 logger.error(f"Failed to write HTML report: {e}") | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1202 raise | 
| 9 
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
 goeckslab parents: 
8diff
changeset | 1203 | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1204 return report_path | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1205 | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1206 | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1207 class WorkflowOrchestrator: | 
| 1 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 1208 """Manages the image-classification workflow.""" | 
| 9 
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
 goeckslab parents: 
8diff
changeset | 1209 | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1210 def __init__(self, args: argparse.Namespace, backend: Backend): | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1211 self.args = args | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1212 self.backend = backend | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1213 self.temp_dir: Optional[Path] = None | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1214 self.image_extract_dir: Optional[Path] = None | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1215 logger.info(f"Orchestrator initialized with backend: {type(backend).__name__}") | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1216 | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1217 def _create_temp_dirs(self) -> None: | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1218 """Create temporary output and image extraction directories.""" | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1219 try: | 
| 1 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 1220 self.temp_dir = Path( | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 1221 tempfile.mkdtemp(dir=self.args.output_dir, prefix=TEMP_DIR_PREFIX) | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 1222 ) | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1223 self.image_extract_dir = self.temp_dir / "images" | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1224 self.image_extract_dir.mkdir() | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1225 logger.info(f"Created temp directory: {self.temp_dir}") | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1226 except Exception: | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1227 logger.error("Failed to create temporary directories", exc_info=True) | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1228 raise | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1229 | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1230 def _extract_images(self) -> None: | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1231 """Extract images from ZIP into the temp image directory.""" | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1232 if self.image_extract_dir is None: | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1233 raise RuntimeError("Temp image directory not initialized.") | 
| 1 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 1234 logger.info( | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 1235 f"Extracting images from {self.args.image_zip} → {self.image_extract_dir}" | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 1236 ) | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1237 try: | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1238 with zipfile.ZipFile(self.args.image_zip, "r") as z: | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1239 z.extractall(self.image_extract_dir) | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1240 logger.info("Image extraction complete.") | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1241 except Exception: | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1242 logger.error("Error extracting zip file", exc_info=True) | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1243 raise | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1244 | 
| 2 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 1245 def _prepare_data(self) -> Tuple[Path, Dict[str, Any], str]: | 
| 1 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 1246 """Load CSV, update image paths, handle splits, and write prepared CSV.""" | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1247 if not self.temp_dir or not self.image_extract_dir: | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1248 raise RuntimeError("Temp dirs not initialized before data prep.") | 
| 9 
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
 goeckslab parents: 
8diff
changeset | 1249 | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1250 try: | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1251 df = pd.read_csv(self.args.csv_file) | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1252 logger.info(f"Loaded CSV: {self.args.csv_file}") | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1253 except Exception: | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1254 logger.error("Error loading CSV file", exc_info=True) | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1255 raise | 
| 9 
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
 goeckslab parents: 
8diff
changeset | 1256 | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1257 required = {IMAGE_PATH_COLUMN_NAME, LABEL_COLUMN_NAME} | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1258 missing = required - set(df.columns) | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1259 if missing: | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1260 raise ValueError(f"Missing CSV columns: {', '.join(missing)}") | 
| 9 
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
 goeckslab parents: 
8diff
changeset | 1261 | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1262 try: | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1263 df[IMAGE_PATH_COLUMN_NAME] = df[IMAGE_PATH_COLUMN_NAME].apply( | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1264 lambda p: str((self.image_extract_dir / p).resolve()) | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1265 ) | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1266 except Exception: | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1267 logger.error("Error updating image paths", exc_info=True) | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1268 raise | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1269 if SPLIT_COLUMN_NAME in df.columns: | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1270 df, split_config, split_info = self._process_fixed_split(df) | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1271 else: | 
| 7 
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
 goeckslab parents: 
6diff
changeset | 1272 logger.info("No split column; creating stratified random split") | 
| 
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
 goeckslab parents: 
6diff
changeset | 1273 df = create_stratified_random_split( | 
| 
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
 goeckslab parents: 
6diff
changeset | 1274 df=df, | 
| 
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
 goeckslab parents: 
6diff
changeset | 1275 split_column=SPLIT_COLUMN_NAME, | 
| 
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
 goeckslab parents: 
6diff
changeset | 1276 split_probabilities=self.args.split_probabilities, | 
| 
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
 goeckslab parents: 
6diff
changeset | 1277 random_state=self.args.random_seed, | 
| 
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
 goeckslab parents: 
6diff
changeset | 1278 label_column=LABEL_COLUMN_NAME, | 
| 
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
 goeckslab parents: 
6diff
changeset | 1279 ) | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1280 split_config = { | 
| 7 
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
 goeckslab parents: 
6diff
changeset | 1281 "type": "fixed", | 
| 
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
 goeckslab parents: 
6diff
changeset | 1282 "column": SPLIT_COLUMN_NAME, | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1283 } | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1284 split_info = ( | 
| 7 
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
 goeckslab parents: 
6diff
changeset | 1285 f"No split column in CSV. Created stratified random split: " | 
| 1 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 1286 f"{[int(p * 100) for p in self.args.split_probabilities]}% " | 
| 7 
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
 goeckslab parents: 
6diff
changeset | 1287 f"for train/val/test with balanced label distribution." | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1288 ) | 
| 9 
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
 goeckslab parents: 
8diff
changeset | 1289 | 
| 2 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 1290 final_csv = self.temp_dir / TEMP_CSV_FILENAME | 
| 9 
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
 goeckslab parents: 
8diff
changeset | 1291 | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1292 try: | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1293 df.to_csv(final_csv, index=False) | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1294 logger.info(f"Saved prepared data to {final_csv}") | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1295 except Exception: | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1296 logger.error("Error saving prepared CSV", exc_info=True) | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1297 raise | 
| 9 
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
 goeckslab parents: 
8diff
changeset | 1298 | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1299 return final_csv, split_config, split_info | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1300 | 
| 2 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 1301 def _process_fixed_split( | 
| 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 1302 self, df: pd.DataFrame | 
| 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 1303 ) -> Tuple[pd.DataFrame, Dict[str, Any], str]: | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1304 """Process a fixed split column (0=train,1=val,2=test).""" | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1305 logger.info(f"Fixed split column '{SPLIT_COLUMN_NAME}' detected.") | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1306 try: | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1307 col = df[SPLIT_COLUMN_NAME] | 
| 1 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 1308 df[SPLIT_COLUMN_NAME] = pd.to_numeric(col, errors="coerce").astype( | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 1309 pd.Int64Dtype() | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 1310 ) | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1311 if df[SPLIT_COLUMN_NAME].isna().any(): | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1312 logger.warning("Split column contains non-numeric/missing values.") | 
| 9 
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
 goeckslab parents: 
8diff
changeset | 1313 | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1314 unique = set(df[SPLIT_COLUMN_NAME].dropna().unique()) | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1315 logger.info(f"Unique split values: {unique}") | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1316 if unique == {0, 2}: | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1317 df = split_data_0_2( | 
| 1 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 1318 df, | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 1319 SPLIT_COLUMN_NAME, | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1320 validation_size=self.args.validation_size, | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1321 label_column=LABEL_COLUMN_NAME, | 
| 1 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 1322 random_state=self.args.random_seed, | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1323 ) | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1324 split_info = ( | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1325 "Detected a split column (with values 0 and 2) in the input CSV. " | 
| 1 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 1326 f"Used this column as a base and reassigned " | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 1327 f"{self.args.validation_size * 100:.1f}% " | 
| 7 
801a8b6973fb
planemo upload for repository https://github.com/goeckslab/gleam.git commit 67df782ea551181e1d240d463764016ba528eba9
 goeckslab parents: 
6diff
changeset | 1328 "of the training set (originally labeled 0) to validation (labeled 1) using stratified sampling." | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1329 ) | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1330 logger.info("Applied custom 0/2 split.") | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1331 elif unique.issubset({0, 1, 2}): | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1332 split_info = "Used user-defined split column from CSV." | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1333 logger.info("Using fixed split as-is.") | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1334 else: | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1335 raise ValueError(f"Unexpected split values: {unique}") | 
| 9 
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
 goeckslab parents: 
8diff
changeset | 1336 | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1337 return df, {"type": "fixed", "column": SPLIT_COLUMN_NAME}, split_info | 
| 9 
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
 goeckslab parents: 
8diff
changeset | 1338 | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1339 except Exception: | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1340 logger.error("Error processing fixed split", exc_info=True) | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1341 raise | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1342 | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1343 def _cleanup_temp_dirs(self) -> None: | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1344 if self.temp_dir and self.temp_dir.exists(): | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1345 logger.info(f"Cleaning up temp directory: {self.temp_dir}") | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1346 shutil.rmtree(self.temp_dir, ignore_errors=True) | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1347 self.temp_dir = None | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1348 self.image_extract_dir = None | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1349 | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1350 def run(self) -> None: | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1351 """Execute the full workflow end-to-end.""" | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1352 logger.info("Starting workflow...") | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1353 self.args.output_dir.mkdir(parents=True, exist_ok=True) | 
| 9 
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
 goeckslab parents: 
8diff
changeset | 1354 | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1355 try: | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1356 self._create_temp_dirs() | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1357 self._extract_images() | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1358 csv_path, split_cfg, split_info = self._prepare_data() | 
| 9 
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
 goeckslab parents: 
8diff
changeset | 1359 | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1360 use_pretrained = self.args.use_pretrained or self.args.fine_tune | 
| 9 
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
 goeckslab parents: 
8diff
changeset | 1361 | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1362 backend_args = { | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1363 "model_name": self.args.model_name, | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1364 "fine_tune": self.args.fine_tune, | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1365 "use_pretrained": use_pretrained, | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1366 "epochs": self.args.epochs, | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1367 "batch_size": self.args.batch_size, | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1368 "preprocessing_num_processes": self.args.preprocessing_num_processes, | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1369 "split_probabilities": self.args.split_probabilities, | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1370 "learning_rate": self.args.learning_rate, | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1371 "random_seed": self.args.random_seed, | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1372 "early_stop": self.args.early_stop, | 
| 1 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 1373 "label_column_data_path": csv_path, | 
| 2 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 1374 "augmentation": self.args.augmentation, | 
| 8 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 1375 "threshold": self.args.threshold, | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1376 } | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1377 yaml_str = self.backend.prepare_config(backend_args, split_cfg) | 
| 9 
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
 goeckslab parents: 
8diff
changeset | 1378 | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1379 config_file = self.temp_dir / TEMP_CONFIG_FILENAME | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1380 config_file.write_text(yaml_str) | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1381 logger.info(f"Wrote backend config: {config_file}") | 
| 9 
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
 goeckslab parents: 
8diff
changeset | 1382 | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1383 self.backend.run_experiment( | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1384 csv_path, | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1385 config_file, | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1386 self.args.output_dir, | 
| 1 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 1387 self.args.random_seed, | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1388 ) | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1389 logger.info("Workflow completed successfully.") | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1390 self.backend.generate_plots(self.args.output_dir) | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1391 report_file = self.backend.generate_html_report( | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1392 "Image Classification Results", | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1393 self.args.output_dir, | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1394 backend_args, | 
| 1 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 1395 split_info, | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1396 ) | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1397 logger.info(f"HTML report generated at: {report_file}") | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1398 self.backend.convert_parquet_to_csv(self.args.output_dir) | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1399 logger.info("Converted Parquet to CSV.") | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1400 except Exception: | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1401 logger.error("Workflow execution failed", exc_info=True) | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1402 raise | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1403 finally: | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1404 self._cleanup_temp_dirs() | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1405 | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1406 | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1407 def parse_learning_rate(s): | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1408 try: | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1409 return float(s) | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1410 except (TypeError, ValueError): | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1411 return None | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1412 | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1413 | 
| 2 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 1414 def aug_parse(aug_string: str): | 
| 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 1415 """ | 
| 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 1416 Parse comma-separated augmentation keys into Ludwig augmentation dicts. | 
| 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 1417 Raises ValueError on unknown key. | 
| 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 1418 """ | 
| 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 1419 mapping = { | 
| 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 1420 "random_horizontal_flip": {"type": "random_horizontal_flip"}, | 
| 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 1421 "random_vertical_flip": {"type": "random_vertical_flip"}, | 
| 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 1422 "random_rotate": {"type": "random_rotate", "degree": 10}, | 
| 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 1423 "random_blur": {"type": "random_blur", "kernel_size": 3}, | 
| 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 1424 "random_brightness": {"type": "random_brightness", "min": 0.5, "max": 2.0}, | 
| 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 1425 "random_contrast": {"type": "random_contrast", "min": 0.5, "max": 2.0}, | 
| 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 1426 } | 
| 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 1427 aug_list = [] | 
| 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 1428 for tok in aug_string.split(","): | 
| 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 1429 key = tok.strip() | 
| 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 1430 if key not in mapping: | 
| 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 1431 valid = ", ".join(mapping.keys()) | 
| 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 1432 raise ValueError(f"Unknown augmentation '{key}'. Valid choices: {valid}") | 
| 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 1433 aug_list.append(mapping[key]) | 
| 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 1434 return aug_list | 
| 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 1435 | 
| 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 1436 | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1437 class SplitProbAction(argparse.Action): | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1438 def __call__(self, parser, namespace, values, option_string=None): | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1439 train, val, test = values | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1440 total = train + val + test | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1441 if abs(total - 1.0) > 1e-6: | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1442 parser.error( | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1443 f"--split-probabilities must sum to 1.0; " | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1444 f"got {train:.3f} + {val:.3f} + {test:.3f} = {total:.3f}" | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1445 ) | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1446 setattr(namespace, self.dest, values) | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1447 | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1448 | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1449 def main(): | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1450 parser = argparse.ArgumentParser( | 
| 1 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 1451 description="Image Classification Learner with Pluggable Backends", | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1452 ) | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1453 parser.add_argument( | 
| 1 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 1454 "--csv-file", | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 1455 required=True, | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 1456 type=Path, | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 1457 help="Path to the input CSV", | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1458 ) | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1459 parser.add_argument( | 
| 1 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 1460 "--image-zip", | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 1461 required=True, | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 1462 type=Path, | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 1463 help="Path to the images ZIP", | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1464 ) | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1465 parser.add_argument( | 
| 1 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 1466 "--model-name", | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 1467 required=True, | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1468 choices=MODEL_ENCODER_TEMPLATES.keys(), | 
| 1 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 1469 help="Which model template to use", | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1470 ) | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1471 parser.add_argument( | 
| 1 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 1472 "--use-pretrained", | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 1473 action="store_true", | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 1474 help="Use pretrained weights for the model", | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1475 ) | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1476 parser.add_argument( | 
| 1 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 1477 "--fine-tune", | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 1478 action="store_true", | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 1479 help="Enable fine-tuning", | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1480 ) | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1481 parser.add_argument( | 
| 1 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 1482 "--epochs", | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 1483 type=int, | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 1484 default=10, | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 1485 help="Number of training epochs", | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1486 ) | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1487 parser.add_argument( | 
| 1 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 1488 "--early-stop", | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 1489 type=int, | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 1490 default=5, | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 1491 help="Early stopping patience", | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1492 ) | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1493 parser.add_argument( | 
| 1 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 1494 "--batch-size", | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 1495 type=int, | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 1496 help="Batch size (None = auto)", | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1497 ) | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1498 parser.add_argument( | 
| 1 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 1499 "--output-dir", | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 1500 type=Path, | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 1501 default=Path("learner_output"), | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 1502 help="Where to write outputs", | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1503 ) | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1504 parser.add_argument( | 
| 1 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 1505 "--validation-size", | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 1506 type=float, | 
| 8 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 1507 default=0.15, | 
| 1 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 1508 help="Fraction for validation (0.0–1.0)", | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1509 ) | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1510 parser.add_argument( | 
| 1 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 1511 "--preprocessing-num-processes", | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 1512 type=int, | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1513 default=max(1, os.cpu_count() // 2), | 
| 1 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 1514 help="CPU processes for data prep", | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1515 ) | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1516 parser.add_argument( | 
| 1 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 1517 "--split-probabilities", | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 1518 type=float, | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 1519 nargs=3, | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1520 metavar=("train", "val", "test"), | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1521 action=SplitProbAction, | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1522 default=[0.7, 0.1, 0.2], | 
| 2 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 1523 help=( | 
| 9 
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
 goeckslab parents: 
8diff
changeset | 1524 "Random split proportions (e.g., 0.7 0.1 0.2).Only used if no split column." | 
| 2 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 1525 ), | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1526 ) | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1527 parser.add_argument( | 
| 1 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 1528 "--random-seed", | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 1529 type=int, | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 1530 default=42, | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 1531 help="Random seed used for dataset splitting (default: 42)", | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1532 ) | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1533 parser.add_argument( | 
| 1 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 1534 "--learning-rate", | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 1535 type=parse_learning_rate, | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 1536 default=None, | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 1537 help="Learning rate. If not provided, Ludwig will auto-select it.", | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1538 ) | 
| 2 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 1539 parser.add_argument( | 
| 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 1540 "--augmentation", | 
| 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 1541 type=str, | 
| 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 1542 default=None, | 
| 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 1543 help=( | 
| 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 1544 "Comma-separated list (in order) of any of: " | 
| 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 1545 "random_horizontal_flip, random_vertical_flip, random_rotate, " | 
| 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 1546 "random_blur, random_brightness, random_contrast. " | 
| 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 1547 "E.g. --augmentation random_horizontal_flip,random_rotate" | 
| 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 1548 ), | 
| 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 1549 ) | 
| 8 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 1550 parser.add_argument( | 
| 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 1551 "--threshold", | 
| 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 1552 type=float, | 
| 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 1553 default=None, | 
| 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 1554 help=( | 
| 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 1555 "Decision threshold for binary classification (0.0–1.0)." | 
| 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 1556 "Overrides default 0.5." | 
| 9 
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
 goeckslab parents: 
8diff
changeset | 1557 ), | 
| 8 
85e6f4b2ad18
planemo upload for repository https://github.com/goeckslab/gleam.git commit 8a42eb9b33df7e1df5ad5153b380e20b910a05b6
 goeckslab parents: 
7diff
changeset | 1558 ) | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1559 args = parser.parse_args() | 
| 9 
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
 goeckslab parents: 
8diff
changeset | 1560 | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1561 if not 0.0 <= args.validation_size <= 1.0: | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1562 parser.error("validation-size must be between 0.0 and 1.0") | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1563 if not args.csv_file.is_file(): | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1564 parser.error(f"CSV not found: {args.csv_file}") | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1565 if not args.image_zip.is_file(): | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1566 parser.error(f"ZIP not found: {args.image_zip}") | 
| 2 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 1567 if args.augmentation is not None: | 
| 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 1568 try: | 
| 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 1569 augmentation_setup = aug_parse(args.augmentation) | 
| 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 1570 setattr(args, "augmentation", augmentation_setup) | 
| 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 1571 except ValueError as e: | 
| 
186424a7eca7
planemo upload for repository https://github.com/goeckslab/gleam.git commit 91fa4aba245520fc0680088a07cead66bcfd4ed2
 goeckslab parents: 
1diff
changeset | 1572 parser.error(str(e)) | 
| 9 
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
 goeckslab parents: 
8diff
changeset | 1573 | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1574 backend_instance = LudwigDirectBackend() | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1575 orchestrator = WorkflowOrchestrator(args, backend_instance) | 
| 9 
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
 goeckslab parents: 
8diff
changeset | 1576 | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1577 exit_code = 0 | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1578 try: | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1579 orchestrator.run() | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1580 logger.info("Main script finished successfully.") | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1581 except Exception as e: | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1582 logger.error(f"Main script failed.{e}") | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1583 exit_code = 1 | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1584 finally: | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1585 sys.exit(exit_code) | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1586 | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1587 | 
| 1 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 1588 if __name__ == "__main__": | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1589 try: | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1590 import ludwig | 
| 9 
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
 goeckslab parents: 
8diff
changeset | 1591 | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1592 logger.debug(f"Found Ludwig version: {ludwig.globals.LUDWIG_VERSION}") | 
| 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1593 except ImportError: | 
| 1 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 1594 logger.error( | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 1595 "Ludwig library not found. Please ensure Ludwig is installed " | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 1596 "('pip install ludwig[image]')" | 
| 
39202fe5cf97
planemo upload for repository https://github.com/goeckslab/gleam.git commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
 goeckslab parents: 
0diff
changeset | 1597 ) | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1598 sys.exit(1) | 
| 9 
9e912fce264c
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
 goeckslab parents: 
8diff
changeset | 1599 | 
| 0 
54b871dfc51e
planemo upload for repository https://github.com/goeckslab/gleam.git commit b7411ff35b6228ccdfd36cd4ebd946c03ac7f7e9
 goeckslab parents: diff
changeset | 1600 main() | 
