Mercurial > repos > goeckslab > image_learner
comparison html_structure.py @ 12:bcfa2e234a80 draft
planemo upload for repository https://github.com/goeckslab/gleam.git commit 96bab8325992d16fcaad8e0a4dc4c62b00e2abc2
| author | goeckslab |
|---|---|
| date | Fri, 21 Nov 2025 15:58:13 +0000 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 11:c5150cceab47 | 12:bcfa2e234a80 |
|---|---|
| 1 import base64 | |
| 2 import json | |
| 3 from typing import Any, Dict, Optional | |
| 4 | |
| 5 from constants import METRIC_DISPLAY_NAMES | |
| 6 from utils import detect_output_type, extract_metrics_from_json | |
| 7 | |
| 8 | |
| 9 def generate_table_row(cells, styles): | |
| 10 """Helper function to generate an HTML table row.""" | |
| 11 return ( | |
| 12 "<tr>" | |
| 13 + "".join(f"<td style='{styles}'>{cell}</td>" for cell in cells) | |
| 14 + "</tr>" | |
| 15 ) | |
| 16 | |
| 17 | |
| 18 def format_config_table_html( | |
| 19 config: dict, | |
| 20 split_info: Optional[str] = None, | |
| 21 training_progress: dict = None, | |
| 22 output_type: Optional[str] = None, | |
| 23 ) -> str: | |
| 24 display_keys = [ | |
| 25 "task_type", | |
| 26 "model_name", | |
| 27 "epochs", | |
| 28 "batch_size", | |
| 29 "fine_tune", | |
| 30 "use_pretrained", | |
| 31 "learning_rate", | |
| 32 "random_seed", | |
| 33 "early_stop", | |
| 34 "threshold", | |
| 35 ] | |
| 36 | |
| 37 rows = [] | |
| 38 | |
| 39 for key in display_keys: | |
| 40 val = config.get(key, None) | |
| 41 if key == "threshold": | |
| 42 if output_type != "binary": | |
| 43 continue | |
| 44 val = val if val is not None else 0.5 | |
| 45 val_str = f"{val:.2f}" | |
| 46 if val == 0.5: | |
| 47 val_str += " (default)" | |
| 48 else: | |
| 49 if key == "task_type": | |
| 50 val_str = val.title() if isinstance(val, str) else "N/A" | |
| 51 elif key == "batch_size": | |
| 52 if val is not None: | |
| 53 val_str = int(val) | |
| 54 else: | |
| 55 val = "auto" | |
| 56 val_str = "auto" | |
| 57 resolved_val = None | |
| 58 if val is None or val == "auto": | |
| 59 if training_progress: | |
| 60 resolved_val = training_progress.get("batch_size") | |
| 61 val = ( | |
| 62 "Auto-selected batch size by Ludwig:<br>" | |
| 63 f"<span style='font-size: 0.85em;'>" | |
| 64 f"{resolved_val if resolved_val else val}</span><br>" | |
| 65 "<span style='font-size: 0.85em;'>" | |
| 66 "Based on model architecture and training setup " | |
| 67 "(e.g., fine-tuning).<br>" | |
| 68 "See <a href='https://ludwig.ai/latest/configuration/trainer/" | |
| 69 "#trainer-parameters' target='_blank'>" | |
| 70 "Ludwig Trainer Parameters</a> for details." | |
| 71 "</span>" | |
| 72 ) | |
| 73 else: | |
| 74 val = ( | |
| 75 "Auto-selected by Ludwig<br>" | |
| 76 "<span style='font-size: 0.85em;'>" | |
| 77 "Automatically tuned based on architecture and dataset.<br>" | |
| 78 "See <a href='https://ludwig.ai/latest/configuration/trainer/" | |
| 79 "#trainer-parameters' target='_blank'>" | |
| 80 "Ludwig Trainer Parameters</a> for details." | |
| 81 "</span>" | |
| 82 ) | |
| 83 elif key == "learning_rate": | |
| 84 if val is not None and val != "auto": | |
| 85 val_str = f"{val:.6f}" | |
| 86 else: | |
| 87 if training_progress: | |
| 88 resolved_val = training_progress.get("learning_rate") | |
| 89 val_str = ( | |
| 90 "Auto-selected learning rate by Ludwig:<br>" | |
| 91 f"<span style='font-size: 0.85em;'>" | |
| 92 f"{resolved_val if resolved_val else 'auto'}</span><br>" | |
| 93 "<span style='font-size: 0.85em;'>" | |
| 94 "Based on model architecture and training setup " | |
| 95 "(e.g., fine-tuning).<br>" | |
| 96 "</span>" | |
| 97 ) | |
| 98 else: | |
| 99 val_str = ( | |
| 100 "Auto-selected by Ludwig<br>" | |
| 101 "<span style='font-size: 0.85em;'>" | |
| 102 "Automatically tuned based on architecture and dataset.<br>" | |
| 103 "See <a href='https://ludwig.ai/latest/configuration/trainer/" | |
| 104 "#trainer-parameters' target='_blank'>" | |
| 105 "Ludwig Trainer Parameters</a> for details." | |
| 106 "</span>" | |
| 107 ) | |
| 108 elif key == "epochs": | |
| 109 if val is None: | |
| 110 val_str = "N/A" | |
| 111 else: | |
| 112 if ( | |
| 113 training_progress | |
| 114 and "epoch" in training_progress | |
| 115 and val > training_progress["epoch"] | |
| 116 ): | |
| 117 val_str = ( | |
| 118 f"Because of early stopping: the training " | |
| 119 f"stopped at epoch {training_progress['epoch']}" | |
| 120 ) | |
| 121 else: | |
| 122 val_str = val | |
| 123 else: | |
| 124 val_str = val if val is not None else "N/A" | |
| 125 if val_str == "N/A" and key not in ["task_type"]: | |
| 126 continue | |
| 127 rows.append( | |
| 128 f"<tr>" | |
| 129 f"<td style='padding: 6px 12px; border: 1px solid #ccc; text-align: left; " | |
| 130 f"white-space: normal; word-break: break-word; overflow-wrap: anywhere;'>" | |
| 131 f"{key.replace('_', ' ').title()}</td>" | |
| 132 f"<td style='padding: 6px 12px; border: 1px solid #ccc; text-align: center; " | |
| 133 f"white-space: normal; word-break: break-word; overflow-wrap: anywhere;'>" | |
| 134 f"{val_str}</td>" | |
| 135 f"</tr>" | |
| 136 ) | |
| 137 | |
| 138 aug_cfg = config.get("augmentation") | |
| 139 if aug_cfg: | |
| 140 types = [str(a.get("type", "")) for a in aug_cfg] | |
| 141 aug_val = ", ".join(types) | |
| 142 rows.append( | |
| 143 f"<tr><td style='padding: 6px 12px; border: 1px solid #ccc; text-align: left; " | |
| 144 f"white-space: normal; word-break: break-word; overflow-wrap: anywhere;'>Augmentation</td>" | |
| 145 f"<td style='padding: 6px 12px; border: 1px solid #ccc; text-align: center; " | |
| 146 f"white-space: normal; word-break: break-word; overflow-wrap: anywhere;'>{aug_val}</td></tr>" | |
| 147 ) | |
| 148 | |
| 149 if split_info: | |
| 150 rows.append( | |
| 151 f"<tr><td style='padding: 6px 12px; border: 1px solid #ccc; text-align: left; " | |
| 152 f"white-space: normal; word-break: break-word; overflow-wrap: anywhere;'>Data Split</td>" | |
| 153 f"<td style='padding: 6px 12px; border: 1px solid #ccc; text-align: center; " | |
| 154 f"white-space: normal; word-break: break-word; overflow-wrap: anywhere;'>{split_info}</td></tr>" | |
| 155 ) | |
| 156 | |
| 157 html = f""" | |
| 158 <h2 style="text-align: center;">Model and Training Summary</h2> | |
| 159 <div style="display: flex; justify-content: center;"> | |
| 160 <table style="border-collapse: collapse; width: 100%; table-layout: fixed;"> | |
| 161 <thead><tr> | |
| 162 <th style="padding: 10px; border: 1px solid #ccc; text-align: left; white-space: nowrap; overflow: hidden; text-overflow: ellipsis;">Parameter</th> | |
| 163 <th style="padding: 10px; border: 1px solid #ccc; text-align: center; white-space: nowrap; overflow: hidden; text-overflow: ellipsis;">Value</th> | |
| 164 </tr></thead> | |
| 165 <tbody> | |
| 166 {"".join(rows)} | |
| 167 </tbody> | |
| 168 </table> | |
| 169 </div><br> | |
| 170 <p style="text-align: center; font-size: 0.9em;"> | |
| 171 Model trained using <a href="https://ludwig.ai/" target="_blank" rel="noopener noreferrer">Ludwig</a>. | |
| 172 <a href="https://ludwig.ai/latest/configuration/" target="_blank" rel="noopener noreferrer"> | |
| 173 Ludwig documentation provides detailed information about default model and training parameters | |
| 174 </a> | |
| 175 </p><hr> | |
| 176 """ | |
| 177 return html | |
| 178 | |
| 179 | |
| 180 def get_html_template(): | |
| 181 """ | |
| 182 Returns the opening HTML, <head> (with CSS/JS), and opens <body> + .container. | |
| 183 Includes: | |
| 184 - Base styling for layout and tables | |
| 185 - Sortable table headers with 3-state arrows (none ⇅, asc ↑, desc ↓) | |
| 186 - A scroll helper class (.scroll-rows-30) that approximates ~30 visible rows | |
| 187 - A guarded script so initializing runs only once even if injected twice | |
| 188 """ | |
| 189 return """ | |
| 190 <!DOCTYPE html> | |
| 191 <html> | |
| 192 <head> | |
| 193 <meta charset="UTF-8"> | |
| 194 <title>Galaxy-Ludwig Report</title> | |
| 195 <style> | |
| 196 body { | |
| 197 font-family: Arial, sans-serif; | |
| 198 margin: 0; | |
| 199 padding: 20px; | |
| 200 background-color: #f4f4f4; | |
| 201 } | |
| 202 .container { | |
| 203 max-width: 1200px; | |
| 204 margin: auto; | |
| 205 background: white; | |
| 206 padding: 20px; | |
| 207 box-shadow: 0 0 10px rgba(0, 0, 0, 0.1); | |
| 208 overflow-x: auto; | |
| 209 } | |
| 210 h1 { | |
| 211 text-align: center; | |
| 212 color: #333; | |
| 213 } | |
| 214 h2 { | |
| 215 border-bottom: 2px solid #4CAF50; | |
| 216 color: #4CAF50; | |
| 217 padding-bottom: 5px; | |
| 218 margin-top: 28px; | |
| 219 } | |
| 220 | |
| 221 /* baseline table setup */ | |
| 222 table { | |
| 223 border-collapse: collapse; | |
| 224 margin: 20px 0; | |
| 225 width: 100%; | |
| 226 table-layout: fixed; | |
| 227 background: #fff; | |
| 228 } | |
| 229 table, th, td { | |
| 230 border: 1px solid #ddd; | |
| 231 } | |
| 232 th, td { | |
| 233 padding: 10px; | |
| 234 text-align: center; | |
| 235 vertical-align: middle; | |
| 236 word-break: break-word; | |
| 237 white-space: normal; | |
| 238 overflow-wrap: anywhere; | |
| 239 } | |
| 240 th { | |
| 241 background-color: #4CAF50; | |
| 242 color: white; | |
| 243 } | |
| 244 | |
| 245 .plot { | |
| 246 text-align: center; | |
| 247 margin: 20px 0; | |
| 248 } | |
| 249 .plot img { | |
| 250 max-width: 100%; | |
| 251 height: auto; | |
| 252 border: 1px solid #ddd; | |
| 253 } | |
| 254 | |
| 255 /* ------------------- | |
| 256 sortable columns (3-state: none ⇅, asc ↑, desc ↓) | |
| 257 ------------------- */ | |
| 258 table.performance-summary th.sortable { | |
| 259 cursor: pointer; | |
| 260 position: relative; | |
| 261 user-select: none; | |
| 262 } | |
| 263 /* default icon space */ | |
| 264 table.performance-summary th.sortable::after { | |
| 265 content: '⇅'; | |
| 266 position: absolute; | |
| 267 right: 12px; | |
| 268 top: 50%; | |
| 269 transform: translateY(-50%); | |
| 270 font-size: 0.8em; | |
| 271 color: #eaf5ea; /* light on green */ | |
| 272 text-shadow: 0 0 1px rgba(0,0,0,0.15); | |
| 273 } | |
| 274 /* three states override the default */ | |
| 275 table.performance-summary th.sortable.sorted-none::after { content: '⇅'; color: #eaf5ea; } | |
| 276 table.performance-summary th.sortable.sorted-asc::after { content: '↑'; color: #ffffff; } | |
| 277 table.performance-summary th.sortable.sorted-desc::after { content: '↓'; color: #ffffff; } | |
| 278 | |
| 279 /* show ~30 rows with a scrollbar (tweak if you want) */ | |
| 280 .scroll-rows-30 { | |
| 281 max-height: 900px; /* ~30 rows depending on row height */ | |
| 282 overflow-y: auto; /* vertical scrollbar ("sidebar") */ | |
| 283 overflow-x: auto; | |
| 284 } | |
| 285 | |
| 286 /* Tabs + Help button (used by build_tabbed_html) */ | |
| 287 .tabs { | |
| 288 display: flex; | |
| 289 align-items: center; | |
| 290 border-bottom: 2px solid #ccc; | |
| 291 margin-bottom: 1rem; | |
| 292 gap: 6px; | |
| 293 flex-wrap: wrap; | |
| 294 } | |
| 295 .tab { | |
| 296 padding: 10px 20px; | |
| 297 cursor: pointer; | |
| 298 border: 1px solid #ccc; | |
| 299 border-bottom: none; | |
| 300 background: #f9f9f9; | |
| 301 margin-right: 5px; | |
| 302 border-top-left-radius: 8px; | |
| 303 border-top-right-radius: 8px; | |
| 304 } | |
| 305 .tab.active { | |
| 306 background: white; | |
| 307 font-weight: bold; | |
| 308 } | |
| 309 .help-btn { | |
| 310 margin-left: auto; | |
| 311 padding: 6px 12px; | |
| 312 font-size: 0.9rem; | |
| 313 border: 1px solid #4CAF50; | |
| 314 border-radius: 4px; | |
| 315 background: #4CAF50; | |
| 316 color: white; | |
| 317 cursor: pointer; | |
| 318 } | |
| 319 .tab-content { | |
| 320 display: none; | |
| 321 padding: 20px; | |
| 322 border: 1px solid #ccc; | |
| 323 border-top: none; | |
| 324 background: #fff; | |
| 325 } | |
| 326 .tab-content.active { | |
| 327 display: block; | |
| 328 } | |
| 329 | |
| 330 /* Modal (used by get_metrics_help_modal) */ | |
| 331 .modal { | |
| 332 display: none; | |
| 333 position: fixed; | |
| 334 z-index: 9999; | |
| 335 left: 0; top: 0; | |
| 336 width: 100%; height: 100%; | |
| 337 overflow: auto; | |
| 338 background-color: rgba(0,0,0,0.4); | |
| 339 } | |
| 340 .modal-content { | |
| 341 background-color: #fefefe; | |
| 342 margin: 8% auto; | |
| 343 padding: 20px; | |
| 344 border: 1px solid #888; | |
| 345 width: 90%; | |
| 346 max-width: 900px; | |
| 347 border-radius: 8px; | |
| 348 } | |
| 349 .modal .close { | |
| 350 color: #777; | |
| 351 float: right; | |
| 352 font-size: 28px; | |
| 353 font-weight: bold; | |
| 354 line-height: 1; | |
| 355 margin-left: 8px; | |
| 356 } | |
| 357 .modal .close:hover, | |
| 358 .modal .close:focus { | |
| 359 color: black; | |
| 360 text-decoration: none; | |
| 361 cursor: pointer; | |
| 362 } | |
| 363 .metrics-guide h3 { margin-top: 20px; } | |
| 364 .metrics-guide p { margin: 6px 0; } | |
| 365 .metrics-guide ul { margin: 10px 0; padding-left: 20px; } | |
| 366 </style> | |
| 367 | |
| 368 <script> | |
| 369 // Guard to avoid double-initialization if this block is included twice | |
| 370 (function(){ | |
| 371 if (window.__perfSummarySortInit) return; | |
| 372 window.__perfSummarySortInit = true; | |
| 373 | |
| 374 function initPerfSummarySorting() { | |
| 375 // Record original order for "back to original" | |
| 376 document.querySelectorAll('table.performance-summary tbody').forEach(tbody => { | |
| 377 Array.from(tbody.rows).forEach((row, i) => { row.dataset.originalOrder = i; }); | |
| 378 }); | |
| 379 | |
| 380 const getText = td => (td?.innerText || '').trim(); | |
| 381 const cmp = (idx, asc) => (a, b) => { | |
| 382 const v1 = getText(a.children[idx]); | |
| 383 const v2 = getText(b.children[idx]); | |
| 384 const n1 = parseFloat(v1), n2 = parseFloat(v2); | |
| 385 if (!isNaN(n1) && !isNaN(n2)) return asc ? n1 - n2 : n2 - n1; // numeric | |
| 386 return asc ? v1.localeCompare(v2) : v2.localeCompare(v1); // lexical | |
| 387 }; | |
| 388 | |
| 389 document.querySelectorAll('table.performance-summary th.sortable').forEach(th => { | |
| 390 // initialize to "none" | |
| 391 th.classList.remove('sorted-asc','sorted-desc'); | |
| 392 th.classList.add('sorted-none'); | |
| 393 | |
| 394 th.addEventListener('click', () => { | |
| 395 const table = th.closest('table'); | |
| 396 const headerRow = th.parentNode; | |
| 397 const allTh = headerRow.querySelectorAll('th.sortable'); | |
| 398 const tbody = table.querySelector('tbody'); | |
| 399 | |
| 400 // Determine current state BEFORE clearing | |
| 401 const isAsc = th.classList.contains('sorted-asc'); | |
| 402 const isDesc = th.classList.contains('sorted-desc'); | |
| 403 | |
| 404 // Reset all headers in this row | |
| 405 allTh.forEach(x => x.classList.remove('sorted-asc','sorted-desc','sorted-none')); | |
| 406 | |
| 407 // Compute next state | |
| 408 let next; | |
| 409 if (!isAsc && !isDesc) { | |
| 410 next = 'asc'; | |
| 411 } else if (isAsc) { | |
| 412 next = 'desc'; | |
| 413 } else { | |
| 414 next = 'none'; | |
| 415 } | |
| 416 th.classList.add('sorted-' + next); | |
| 417 | |
| 418 // Sort rows according to the chosen state | |
| 419 const rows = Array.from(tbody.rows); | |
| 420 if (next === 'none') { | |
| 421 rows.sort((a, b) => (a.dataset.originalOrder - b.dataset.originalOrder)); | |
| 422 } else { | |
| 423 const idx = Array.from(headerRow.children).indexOf(th); | |
| 424 rows.sort(cmp(idx, next === 'asc')); | |
| 425 } | |
| 426 rows.forEach(r => tbody.appendChild(r)); | |
| 427 }); | |
| 428 }); | |
| 429 } | |
| 430 | |
| 431 // Run after DOM is ready | |
| 432 if (document.readyState === 'loading') { | |
| 433 document.addEventListener('DOMContentLoaded', initPerfSummarySorting); | |
| 434 } else { | |
| 435 initPerfSummarySorting(); | |
| 436 } | |
| 437 })(); | |
| 438 </script> | |
| 439 </head> | |
| 440 <body> | |
| 441 <div class="container"> | |
| 442 """ | |
| 443 | |
| 444 | |
| 445 def get_html_closing(): | |
| 446 """Closes .container, body, and html.""" | |
| 447 return """ | |
| 448 </div> | |
| 449 </body> | |
| 450 </html> | |
| 451 """ | |
| 452 | |
| 453 | |
| 454 def encode_image_to_base64(image_path: str) -> str: | |
| 455 """Convert an image file to a base64 encoded string.""" | |
| 456 with open(image_path, "rb") as img_file: | |
| 457 return base64.b64encode(img_file.read()).decode("utf-8") | |
| 458 | |
| 459 | |
| 460 def json_to_nested_html_table(json_data, depth: int = 0) -> str: | |
| 461 """ | |
| 462 Convert a JSON-able object to an HTML nested table. | |
| 463 Renders dicts as two-column tables (key/value) and lists as index/value rows. | |
| 464 """ | |
| 465 # Base case: flat dict (no nested dict/list values) | |
| 466 if isinstance(json_data, dict) and all( | |
| 467 not isinstance(v, (dict, list)) for v in json_data.values() | |
| 468 ): | |
| 469 rows = [ | |
| 470 f"<tr><th>{key}</th><td>{value}</td></tr>" | |
| 471 for key, value in json_data.items() | |
| 472 ] | |
| 473 return f"<table>{''.join(rows)}</table>" | |
| 474 | |
| 475 # Base case: list of simple values | |
| 476 if isinstance(json_data, list) and all( | |
| 477 not isinstance(v, (dict, list)) for v in json_data | |
| 478 ): | |
| 479 rows = [ | |
| 480 f"<tr><th>Index {i}</th><td>{value}</td></tr>" | |
| 481 for i, value in enumerate(json_data) | |
| 482 ] | |
| 483 return f"<table>{''.join(rows)}</table>" | |
| 484 | |
| 485 # Recursive cases | |
| 486 if isinstance(json_data, dict): | |
| 487 rows = [ | |
| 488 ( | |
| 489 f"<tr><th style='text-align:left;padding-left:{depth * 20}px;'>{key}</th>" | |
| 490 f"<td>{json_to_nested_html_table(value, depth + 1)}</td></tr>" | |
| 491 ) | |
| 492 for key, value in json_data.items() | |
| 493 ] | |
| 494 return f"<table>{''.join(rows)}</table>" | |
| 495 | |
| 496 if isinstance(json_data, list): | |
| 497 rows = [ | |
| 498 ( | |
| 499 f"<tr><th style='text-align:left;padding-left:{depth * 20}px;'>[{i}]</th>" | |
| 500 f"<td>{json_to_nested_html_table(value, depth + 1)}</td></tr>" | |
| 501 ) | |
| 502 for i, value in enumerate(json_data) | |
| 503 ] | |
| 504 return f"<table>{''.join(rows)}</table>" | |
| 505 | |
| 506 # Primitive | |
| 507 return f"{json_data}" | |
| 508 | |
| 509 | |
| 510 def json_to_html_table(json_data) -> str: | |
| 511 """ | |
| 512 Convert JSON (dict or string) into a vertically oriented HTML table. | |
| 513 """ | |
| 514 if isinstance(json_data, str): | |
| 515 json_data = json.loads(json_data) | |
| 516 return json_to_nested_html_table(json_data) | |
| 517 | |
| 518 | |
| 519 def build_tabbed_html(metrics_html: str, train_val_html: str, test_html: str) -> str: | |
| 520 """ | |
| 521 Build a 3-tab interface: | |
| 522 - Config and Results Summary | |
| 523 - Train/Validation Results | |
| 524 - Test Results | |
| 525 Includes a persistent "Help" button that toggles the metrics modal. | |
| 526 """ | |
| 527 return f""" | |
| 528 <div class="tabs"> | |
| 529 <div class="tab active" onclick="showTab('metrics')">Config and Results Summary</div> | |
| 530 <div class="tab" onclick="showTab('trainval')">Train/Validation Results</div> | |
| 531 <div class="tab" onclick="showTab('test')">Test Results</div> | |
| 532 <button id="openMetricsHelp" class="help-btn" title="Open metrics help">Help</button> | |
| 533 </div> | |
| 534 | |
| 535 <div id="metrics" class="tab-content active"> | |
| 536 {metrics_html} | |
| 537 </div> | |
| 538 <div id="trainval" class="tab-content"> | |
| 539 {train_val_html} | |
| 540 </div> | |
| 541 <div id="test" class="tab-content"> | |
| 542 {test_html} | |
| 543 </div> | |
| 544 | |
| 545 <script> | |
| 546 function showTab(id) {{ | |
| 547 document.querySelectorAll('.tab-content').forEach(el => el.classList.remove('active')); | |
| 548 document.querySelectorAll('.tab').forEach(el => el.classList.remove('active')); | |
| 549 document.getElementById(id).classList.add('active'); | |
| 550 // find tab with matching onclick target | |
| 551 document.querySelectorAll('.tab').forEach(t => {{ | |
| 552 if (t.getAttribute('onclick') && t.getAttribute('onclick').includes(id)) {{ | |
| 553 t.classList.add('active'); | |
| 554 }} | |
| 555 }}); | |
| 556 }} | |
| 557 </script> | |
| 558 """ | |
| 559 | |
| 560 | |
| 561 def get_metrics_help_modal() -> str: | |
| 562 """ | |
| 563 Returns a ready-to-use modal with a comprehensive metrics guide and | |
| 564 the small script that wires the "Help" button to open/close the modal. | |
| 565 """ | |
| 566 modal_html = ( | |
| 567 '<div id="metricsHelpModal" class="modal">' | |
| 568 ' <div class="modal-content">' | |
| 569 ' <span class="close">×</span>' | |
| 570 " <h2>Model Evaluation Metrics — Help Guide</h2>" | |
| 571 ' <div class="metrics-guide">' | |
| 572 ' <h3>1) General Metrics (Regression and Classification)</h3>' | |
| 573 ' <p><strong>Loss (Regression & Classification):</strong> ' | |
| 574 'Measures the difference between predicted and actual values, ' | |
| 575 'optimized during training. Lower is better. ' | |
| 576 'For regression, this is often Mean Squared Error (MSE) or ' | |
| 577 'Mean Absolute Error (MAE). For classification, it\'s typically ' | |
| 578 'cross-entropy or log loss.</p>' | |
| 579 ' <h3>2) Regression Metrics</h3>' | |
| 580 ' <p><strong>Mean Absolute Error (MAE):</strong> ' | |
| 581 'Average of absolute differences between predicted and actual values, ' | |
| 582 'in the same units as the target. Use for interpretable error measurement ' | |
| 583 'when all errors are equally important. Less sensitive to outliers than MSE.</p>' | |
| 584 ' <p><strong>Mean Squared Error (MSE):</strong> ' | |
| 585 'Average of squared differences between predicted and actual values. ' | |
| 586 'Penalizes larger errors more heavily, useful when large deviations are critical. ' | |
| 587 'Often used as the loss function in regression.</p>' | |
| 588 ' <p><strong>Root Mean Squared Error (RMSE):</strong> ' | |
| 589 'Square root of MSE, in the same units as the target. ' | |
| 590 'Balances interpretability and sensitivity to large errors. ' | |
| 591 'Widely used for regression evaluation.</p>' | |
| 592 ' <p><strong>Mean Absolute Percentage Error (MAPE):</strong> ' | |
| 593 'Average absolute error as a percentage of actual values. ' | |
| 594 'Scale-independent, ideal for comparing relative errors across datasets. ' | |
| 595 'Avoid when actual values are near zero.</p>' | |
| 596 ' <p><strong>Root Mean Squared Percentage Error (RMSPE):</strong> ' | |
| 597 'Square root of mean squared percentage error. Scale-independent, ' | |
| 598 'penalizes larger relative errors more than MAPE. Use for forecasting ' | |
| 599 'or when relative accuracy matters.</p>' | |
| 600 ' <p><strong>R² Score:</strong> Proportion of variance in the target ' | |
| 601 'explained by the model. Ranges from negative infinity to 1 (perfect prediction). ' | |
| 602 'Use to assess model fit; negative values indicate poor performance ' | |
| 603 'compared to predicting the mean.</p>' | |
| 604 ' <h3>3) Classification Metrics</h3>' | |
| 605 ' <p><strong>Accuracy:</strong> Proportion of correct predictions ' | |
| 606 'among all predictions. Simple but misleading for imbalanced datasets, ' | |
| 607 'where high accuracy may hide poor performance on minority classes.</p>' | |
| 608 ' <p><strong>Micro Accuracy:</strong> Sums true positives and true negatives ' | |
| 609 'across all classes before computing accuracy. Suitable for multiclass or ' | |
| 610 'multilabel problems with imbalanced data.</p>' | |
| 611 ' <p><strong>Token Accuracy:</strong> Measures how often predicted tokens ' | |
| 612 '(e.g., in sequences) match true tokens. Common in NLP tasks like text generation ' | |
| 613 'or token classification.</p>' | |
| 614 ' <p><strong>Precision:</strong> Proportion of positive predictions that are ' | |
| 615 'correct (TP / (TP + FP)). Use when false positives are costly, e.g., spam detection.</p>' | |
| 616 ' <p><strong>Recall (Sensitivity):</strong> Proportion of actual positives ' | |
| 617 'correctly predicted (TP / (TP + FN)). Use when missing positives is risky, ' | |
| 618 'e.g., disease detection.</p>' | |
| 619 ' <p><strong>Specificity:</strong> True negative rate (TN / (TN + FP)). ' | |
| 620 'Measures ability to identify negatives. Useful in medical testing to avoid ' | |
| 621 'false alarms.</p>' | |
| 622 ' <h3>4) Classification: Macro, Micro, and Weighted Averages</h3>' | |
| 623 ' <p><strong>Macro Precision / Recall / F1:</strong> Averages the metric ' | |
| 624 'across all classes, treating each equally. Best for balanced datasets where ' | |
| 625 'all classes are equally important.</p>' | |
| 626 ' <p><strong>Micro Precision / Recall / F1:</strong> Aggregates true positives, ' | |
| 627 'false positives, and false negatives across all classes before computing. ' | |
| 628 'Ideal for imbalanced or multilabel classification.</p>' | |
| 629 ' <p><strong>Weighted Precision / Recall / F1:</strong> Averages metrics ' | |
| 630 'across classes, weighted by the number of true instances per class. Balances ' | |
| 631 'class importance based on frequency.</p>' | |
| 632 ' <h3>5) Classification: Average Precision (PR-AUC Variants)</h3>' | |
| 633 ' <p><strong>Average Precision Macro:</strong> Precision-Recall AUC averaged ' | |
| 634 'equally across classes. Use for balanced multiclass problems.</p>' | |
| 635 ' <p><strong>Average Precision Micro:</strong> Global Precision-Recall AUC ' | |
| 636 'using all instances. Best for imbalanced or multilabel classification.</p>' | |
| 637 ' <p><strong>Average Precision Samples:</strong> Precision-Recall AUC averaged ' | |
| 638 'across individual samples. Ideal for multilabel tasks where samples have multiple ' | |
| 639 'labels.</p>' | |
| 640 ' <h3>6) Classification: ROC-AUC Variants</h3>' | |
| 641 ' <p><strong>ROC-AUC:</strong> Measures ability to distinguish between classes. ' | |
| 642 'AUC = 1 is perfect; 0.5 is random guessing. Use for binary classification.</p>' | |
| 643 ' <p><strong>Macro ROC-AUC:</strong> Averages AUC across all classes equally. ' | |
| 644 'Suitable for balanced multiclass problems.</p>' | |
| 645 ' <p><strong>Micro ROC-AUC:</strong> Computes AUC from aggregated predictions ' | |
| 646 'across all classes. Useful for imbalanced or multilabel settings.</p>' | |
| 647 ' <h3>7) Classification: Confusion Matrix Stats (Per Class)</h3>' | |
| 648 ' <p><strong>True Positives / Negatives (TP / TN):</strong> Correct predictions ' | |
| 649 'for positives and negatives, respectively.</p>' | |
| 650 ' <p><strong>False Positives / Negatives (FP / FN):</strong> Incorrect predictions ' | |
| 651 '— false alarms and missed detections.</p>' | |
| 652 ' <h3>8) Classification: Ranking Metrics</h3>' | |
| 653 ' <p><strong>Hits at K:</strong> Measures whether the true label is among the ' | |
| 654 'top-K predictions. Common in recommendation systems and retrieval tasks.</p>' | |
| 655 ' <h3>9) Other Metrics (Classification)</h3>' | |
| 656 ' <p><strong>Cohen\'s Kappa:</strong> Measures agreement between predicted and ' | |
| 657 'actual labels, adjusted for chance. Useful for multiclass classification with ' | |
| 658 'imbalanced data.</p>' | |
| 659 ' <p><strong>Matthews Correlation Coefficient (MCC):</strong> Balanced measure ' | |
| 660 'using TP, TN, FP, and FN. Effective for imbalanced datasets.</p>' | |
| 661 ' <h3>10) Metric Recommendations</h3>' | |
| 662 ' <ul>' | |
| 663 ' <li><strong>Regression:</strong> Use <strong>RMSE</strong> or ' | |
| 664 '<strong>MAE</strong> for general evaluation, <strong>MAPE</strong> for relative ' | |
| 665 'errors, and <strong>R²</strong> to assess model fit. Use <strong>MSE</strong> or ' | |
| 666 '<strong>RMSPE</strong> when large errors are critical.</li>' | |
| 667 ' <li><strong>Classification (Balanced Data):</strong> Use <strong>Accuracy</strong> ' | |
| 668 'and <strong>F1</strong> for overall performance.</li>' | |
| 669 ' <li><strong>Classification (Imbalanced Data):</strong> Use <strong>Precision</strong>, ' | |
| 670 '<strong>Recall</strong>, and <strong>ROC-AUC</strong> to focus on minority class ' | |
| 671 'performance.</li>' | |
| 672 ' <li><strong>Multilabel or Imbalanced Classification:</strong> Use ' | |
| 673 '<strong>Micro Precision/Recall/F1</strong> or <strong>Micro ROC-AUC</strong>.</li>' | |
| 674 ' <li><strong>Balanced Multiclass:</strong> Use <strong>Macro Precision/Recall/F1</strong> ' | |
| 675 'or <strong>Macro ROC-AUC</strong>.</li>' | |
| 676 ' <li><strong>Class Frequency Matters:</strong> Use <strong>Weighted Precision/Recall/F1</strong> ' | |
| 677 'to account for class imbalance.</li>' | |
| 678 ' <li><strong>Recommendation/Ranking:</strong> Use <strong>Hits at K</strong> for retrieval tasks.</li>' | |
| 679 ' <li><strong>Detailed Analysis:</strong> Use <strong>Confusion Matrix stats</strong> ' | |
| 680 'for class-wise performance in classification.</li>' | |
| 681 ' </ul>' | |
| 682 ' </div>' | |
| 683 ' </div>' | |
| 684 '</div>' | |
| 685 ) | |
| 686 | |
| 687 modal_js = ( | |
| 688 "<script>" | |
| 689 "document.addEventListener('DOMContentLoaded', function() {" | |
| 690 " var modal = document.getElementById('metricsHelpModal');" | |
| 691 " var openBtn = document.getElementById('openMetricsHelp');" | |
| 692 " var closeBtn = modal ? modal.querySelector('.close') : null;" | |
| 693 " if (openBtn && modal) {" | |
| 694 " openBtn.addEventListener('click', function(){ modal.style.display = 'block'; });" | |
| 695 " }" | |
| 696 " if (closeBtn && modal) {" | |
| 697 " closeBtn.addEventListener('click', function(){ modal.style.display = 'none'; });" | |
| 698 " }" | |
| 699 " window.addEventListener('click', function(ev){" | |
| 700 " if (ev.target === modal) { modal.style.display = 'none'; }" | |
| 701 " });" | |
| 702 "});" | |
| 703 "</script>" | |
| 704 ) | |
| 705 return modal_html + modal_js | |
| 706 | |
| 707 # ----------------------------------------- | |
| 708 # MODEL PERFORMANCE (Train/Val/Test) TABLE | |
| 709 # ----------------------------------------- | |
| 710 | |
| 711 | |
| 712 def format_stats_table_html(train_stats: dict, test_stats: dict, output_type: str) -> str: | |
| 713 """Formats a combined HTML table for training, validation, and test metrics.""" | |
| 714 all_metrics = extract_metrics_from_json(train_stats, test_stats, output_type) | |
| 715 rows = [] | |
| 716 for metric_key in sorted(all_metrics["training"].keys()): | |
| 717 if ( | |
| 718 metric_key in all_metrics["validation"] | |
| 719 and metric_key in all_metrics["test"] | |
| 720 ): | |
| 721 display_name = METRIC_DISPLAY_NAMES.get( | |
| 722 metric_key, | |
| 723 metric_key.replace("_", " ").title(), | |
| 724 ) | |
| 725 t = all_metrics["training"].get(metric_key) | |
| 726 v = all_metrics["validation"].get(metric_key) | |
| 727 te = all_metrics["test"].get(metric_key) | |
| 728 if all(x is not None for x in [t, v, te]): | |
| 729 rows.append([display_name, f"{t:.4f}", f"{v:.4f}", f"{te:.4f}"]) | |
| 730 | |
| 731 if not rows: | |
| 732 return "<table><tr><td>No metric values found.</td></tr></table>" | |
| 733 | |
| 734 html = ( | |
| 735 "<h2 style='text-align: center;'>Model Performance Summary</h2>" | |
| 736 "<div style='display: flex; justify-content: center;'>" | |
| 737 "<table class='performance-summary' style='border-collapse: collapse;'>" | |
| 738 "<thead><tr>" | |
| 739 "<th class='sortable' style='padding: 10px; border: 1px solid #ccc; text-align: left; white-space: nowrap;'>Metric</th>" | |
| 740 "<th class='sortable' style='padding: 10px; border: 1px solid #ccc; text-align: center; white-space: nowrap;'>Train</th>" | |
| 741 "<th class='sortable' style='padding: 10px; border: 1px solid #ccc; text-align: center; white-space: nowrap;'>Validation</th>" | |
| 742 "<th class='sortable' style='padding: 10px; border: 1px solid #ccc; text-align: center; white-space: nowrap;'>Test</th>" | |
| 743 "</tr></thead><tbody>" | |
| 744 ) | |
| 745 for row in rows: | |
| 746 html += generate_table_row( | |
| 747 row, | |
| 748 "padding: 10px; border: 1px solid #ccc; text-align: center; white-space: nowrap;", | |
| 749 ) | |
| 750 html += "</tbody></table></div><br>" | |
| 751 return html | |
| 752 | |
| 753 # ------------------------------------------- | |
| 754 # TRAIN/VALIDATION PERFORMANCE SUMMARY TABLE | |
| 755 # ------------------------------------------- | |
| 756 | |
| 757 | |
| 758 def format_train_val_stats_table_html(train_stats: dict, test_stats: dict) -> str: | |
| 759 """Format train/validation metrics into an HTML table.""" | |
| 760 all_metrics = extract_metrics_from_json(train_stats, test_stats, detect_output_type(test_stats)) | |
| 761 rows = [] | |
| 762 for metric_key in sorted(all_metrics["training"].keys()): | |
| 763 if metric_key in all_metrics["validation"]: | |
| 764 display_name = METRIC_DISPLAY_NAMES.get( | |
| 765 metric_key, | |
| 766 metric_key.replace("_", " ").title(), | |
| 767 ) | |
| 768 t = all_metrics["training"].get(metric_key) | |
| 769 v = all_metrics["validation"].get(metric_key) | |
| 770 if t is not None and v is not None: | |
| 771 rows.append([display_name, f"{t:.4f}", f"{v:.4f}"]) | |
| 772 | |
| 773 if not rows: | |
| 774 return "<table><tr><td>No metric values found for Train/Validation.</td></tr></table>" | |
| 775 | |
| 776 html = ( | |
| 777 "<h2 style='text-align: center;'>Train/Validation Performance Summary</h2>" | |
| 778 "<div style='display: flex; justify-content: center;'>" | |
| 779 "<table class='performance-summary' style='border-collapse: collapse;'>" | |
| 780 "<thead><tr>" | |
| 781 "<th class='sortable' style='padding: 10px; border: 1px solid #ccc; text-align: left; white-space: nowrap;'>Metric</th>" | |
| 782 "<th class='sortable' style='padding: 10px; border: 1px solid #ccc; text-align: center; white-space: nowrap;'>Train</th>" | |
| 783 "<th class='sortable' style='padding: 10px; border: 1px solid #ccc; text-align: center; white-space: nowrap;'>Validation</th>" | |
| 784 "</tr></thead><tbody>" | |
| 785 ) | |
| 786 for row in rows: | |
| 787 html += generate_table_row( | |
| 788 row, | |
| 789 "padding: 10px; border: 1px solid #ccc; text-align: center; white-space: nowrap;", | |
| 790 ) | |
| 791 html += "</tbody></table></div><br>" | |
| 792 return html | |
| 793 | |
| 794 # ----------------------------------------- | |
| 795 # TEST‐ONLY PERFORMANCE SUMMARY TABLE | |
| 796 # ----------------------------------------- | |
| 797 | |
| 798 | |
| 799 def format_test_merged_stats_table_html( | |
| 800 test_metrics: Dict[str, Any], output_type: str | |
| 801 ) -> str: | |
| 802 """Format test metrics into an HTML table.""" | |
| 803 rows = [] | |
| 804 for key in sorted(test_metrics.keys()): | |
| 805 display_name = METRIC_DISPLAY_NAMES.get(key, key.replace("_", " ").title()) | |
| 806 value = test_metrics[key] | |
| 807 if value is not None: | |
| 808 rows.append([display_name, f"{value:.4f}"]) | |
| 809 | |
| 810 if not rows: | |
| 811 return "<table><tr><td>No test metric values found.</td></tr></table>" | |
| 812 | |
| 813 html = ( | |
| 814 "<h2 style='text-align: center;'>Test Performance Summary</h2>" | |
| 815 "<div style='display: flex; justify-content: center;'>" | |
| 816 "<table class='performance-summary' style='border-collapse: collapse;'>" | |
| 817 "<thead><tr>" | |
| 818 "<th class='sortable' style='padding: 10px; border: 1px solid #ccc; text-align: left; white-space: nowrap;'>Metric</th>" | |
| 819 "<th class='sortable' style='padding: 10px; border: 1px solid #ccc; text-align: center; white-space: nowrap;'>Test</th>" | |
| 820 "</tr></thead><tbody>" | |
| 821 ) | |
| 822 for row in rows: | |
| 823 html += generate_table_row( | |
| 824 row, | |
| 825 "padding: 10px; border: 1px solid #ccc; text-align: center; white-space: nowrap;", | |
| 826 ) | |
| 827 html += "</tbody></table></div><br>" | |
| 828 return html |
