multimodal_learner: report_utils.py comparison

comparison report_utils.py @ 0:375c36923da1 draft default tip

planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0

author	goeckslab
date	Tue, 09 Dec 2025 23:49:47 +0000
parents
children

comparison

equal deleted inserted replaced

--1:000000000000
+:375c36923da1
+import base64
+import html
+import json
+import logging
+import os
+import platform
+import shutil
+import sys
+import tempfile
+from datetime import datetime
+from typing import Any, Dict, List, Optional
+import numpy as np
+import pandas as pd
+import yaml
+from utils import verify_outputs
+logger = logging.getLogger(__name__)
+def _escape(s: Any) -> str:
+return html.escape(str(s))
+def _write_predictor_path(predictor):
+try:
+pred_path = getattr(predictor, "path", None)
+if pred_path:
+with open("predictor_path.txt", "w") as pf:
+pf.write(str(pred_path))
+logger.info("Wrote predictor path → predictor_path.txt")
+return pred_path
+except Exception:
+logger.warning("Could not write predictor_path.txt")
+return None
+def _copy_config_if_available(pred_path: Optional[str], output_config: Optional[str]):
+if not output_config:
+return
+try:
+config_yaml_path = os.path.join(pred_path, "config.yaml") if pred_path else None
+if config_yaml_path and os.path.isfile(config_yaml_path):
+shutil.copy2(config_yaml_path, output_config)
+logger.info(f"Wrote AutoGluon config → {output_config}")
+else:
+with open(output_config, "w") as cfg_out:
+cfg_out.write("# config.yaml not found for this run\n")
+logger.warning(f"AutoGluon config.yaml not found; created placeholder at {output_config}")
+except Exception as e:
+logger.error(f"Failed to write config output '{output_config}': {e}")
+try:
+with open(output_config, "w") as cfg_out:
+cfg_out.write(f"# Failed to copy config.yaml: {e}\n")
+except Exception:
+pass
+def _load_config_yaml(args, predictor) -> dict:
+"""
+Load config.yaml either from the predictor path or the exported output_config.
+"""
+candidates = []
+pred_path = getattr(predictor, "path", None)
+if pred_path:
+cfg_path = os.path.join(pred_path, "config.yaml")
+if os.path.isfile(cfg_path):
+candidates.append(cfg_path)
+if args.output_config and os.path.isfile(args.output_config):
+candidates.append(args.output_config)
+for p in candidates:
+try:
+with open(p, "r") as f:
+return yaml.safe_load(f) or {}
+except Exception:
+continue
+return {}
+def _summarize_config(cfg: dict, args) -> List[tuple[str, str]]:
+"""
+Build rows describing model components and key hyperparameters from a loaded config.yaml.
+Falls back to CLI args when config values are missing.
+"""
+rows: List[tuple[str, str]] = []
+model_cfg = cfg.get("model", {}) if isinstance(cfg, dict) else {}
+names = model_cfg.get("names") or []
+if names:
+rows.append(("Model components", ", ".join(names)))
+# Tabular backbone with data types
+tabular_val = "—"
+for k, v in model_cfg.items():
+if k in ("names", "hf_text", "timm_image"):
+continue
+if isinstance(v, dict) and "data_types" in v:
+dtypes = v.get("data_types") or []
+if any(t in ("categorical", "numerical") for t in dtypes):
+dt_str = ", ".join(dtypes) if dtypes else ""
+tabular_val = f"{k} ({dt_str})" if dt_str else k
+break
+rows.append(("Tabular backbone", tabular_val))
+image_val = model_cfg.get("timm_image", {}).get("checkpoint_name") or "—"
+rows.append(("Image backbone", image_val))
+text_val = model_cfg.get("hf_text", {}).get("checkpoint_name") or "—"
+rows.append(("Text backbone", text_val))
+fusion_val = "—"
+for k in model_cfg.keys():
+if str(k).startswith("fusion"):
+fusion_val = k
+break
+rows.append(("Fusion backbone", fusion_val))
+# Optimizer block
+optim_cfg = cfg.get("optim", {}) if isinstance(cfg, dict) else {}
+optim_map = [
+("optim_type", "Optimizer"),
+("lr", "Learning rate"),
+("weight_decay", "Weight decay"),
+("lr_decay", "LR decay"),
+("max_epochs", "Max epochs"),
+("max_steps", "Max steps"),
+("patience", "Early-stop patience"),
+("check_val_every_n_epoch", "Val check every N epochs"),
+("top_k", "Top K checkpoints"),
+("top_k_average_method", "Top K averaging"),
+]
+for key, label in optim_map:
+if key in optim_cfg:
+rows.append((label, optim_cfg[key]))
+env_cfg = cfg.get("env", {}) if isinstance(cfg, dict) else {}
+if "batch_size" in env_cfg:
+rows.append(("Global batch size", env_cfg["batch_size"]))
+return rows
+def write_outputs(
+args,
+predictor,
+problem_type: str,
+eval_results: dict,
+data_ctx: dict,
+raw_folds=None,
+ag_folds=None,
+raw_metrics_std=None,
+ag_by_split_std=None,
+):
+from plot_logic import (
+build_summary_html,
+build_test_html_and_plots,
+build_feature_html,
+assemble_full_html_report,
+build_train_html_and_plots,
+)
+from autogluon.multimodal import MultiModalPredictor
+from metrics_logic import aggregate_metrics
+raw_metrics = eval_results.get("raw_metrics", {})
+ag_by_split = eval_results.get("ag_eval", {})
+fit_summary_obj = eval_results.get("fit_summary")
+df_train = data_ctx.get("train")
+df_val = data_ctx.get("val")
+df_test_internal = data_ctx.get("test_internal")
+df_test_external = data_ctx.get("test_external")
+df_test = df_test_external if df_test_external is not None else df_test_internal
+df_train_full = df_train if df_val is None else pd.concat([df_train, df_val], ignore_index=True)
+# Aggregate folds if provided without stds
+if raw_folds and raw_metrics_std is None:
+raw_metrics, raw_metrics_std = aggregate_metrics(raw_folds)
+if ag_folds and ag_by_split_std is None:
+ag_by_split, ag_by_split_std = aggregate_metrics(ag_folds)
+# Inject AG eval into raw metrics for visibility
+def _inject_ag(src: dict, dst: dict):
+for k, v in (src or {}).items():
+try:
+dst[f"AG_{k}"] = float(v)
+except Exception:
+dst[f"AG_{k}"] = v
+if "Train" in raw_metrics and "Train" in ag_by_split:
+_inject_ag(ag_by_split["Train"], raw_metrics["Train"])
+if "Validation" in raw_metrics and "Validation" in ag_by_split:
+_inject_ag(ag_by_split["Validation"], raw_metrics["Validation"])
+if "Test" in raw_metrics and "Test" in ag_by_split:
+_inject_ag(ag_by_split["Test"], raw_metrics["Test"])
+# JSON
+with open(args.output_json, "w") as f:
+json.dump(
+{
+"train": raw_metrics.get("Train", {}),
+"val": raw_metrics.get("Validation", {}),
+"test": raw_metrics.get("Test", {}),
+"test_external": raw_metrics.get("Test (external)", {}),
+"ag_eval": ag_by_split,
+"ag_eval_std": ag_by_split_std,
+"fit_summary": fit_summary_obj,
+"problem_type": problem_type,
+"predictor_path": getattr(predictor, "path", None),
+"threshold": args.threshold,
+"threshold_test": args.threshold,
+"preset": args.preset,
+"eval_metric": args.eval_metric,
+"folds": {
+"raw_folds": raw_folds,
+"ag_folds": ag_folds,
+"summary_mean": raw_metrics if raw_folds else None,
+"summary_std": raw_metrics_std,
+"ag_summary_mean": ag_by_split,
+"ag_summary_std": ag_by_split_std,
+},
+},
+f,
+indent=2,
+default=str,
+)
+logger.info(f"Wrote full JSON → {args.output_json}")
+# HTML report assembly
+label_col = args.target_column
+class_balance_block_html = build_class_balance_html(
+df_train=df_train,
+label_col=label_col,
+df_val=df_val,
+df_test=df_test,
+)
+summary_perf_table_html = build_model_performance_summary_table(
+train_scores=raw_metrics.get("Train", {}),
+val_scores=raw_metrics.get("Validation", {}),
+test_scores=raw_metrics.get("Test", {}),
+include_test=True,
+title=None,
+show_title=False,
+)
+cfg_yaml = _load_config_yaml(args, predictor)
+config_rows = _summarize_config(cfg_yaml, args)
+threshold_rows = []
+if problem_type == "binary" and args.threshold is not None:
+threshold_rows.append(("Decision threshold (Test)", f"{float(args.threshold):.3f}"))
+extra_run_rows = [
+("Target column", label_col),
+("Model evaluation metric", args.eval_metric or "AutoGluon default"),
+("Experiment quality", args.preset or "AutoGluon default"),
+] + threshold_rows + config_rows
+summary_html = build_summary_html(
+predictor=predictor,
+df_train=df_train_full,
+df_val=df_val,
+df_test=df_test,
+label_column=label_col,
+extra_run_rows=extra_run_rows,
+class_balance_html=class_balance_block_html,
+perf_table_html=summary_perf_table_html,
+)
+train_tab_perf_html = build_model_performance_summary_table(
+train_scores=raw_metrics.get("Train", {}),
+val_scores=raw_metrics.get("Validation", {}),
+test_scores=raw_metrics.get("Test", {}),
+include_test=False,
+title=None,
+show_title=False,
+)
+train_html = build_train_html_and_plots(
+predictor=predictor,
+problem_type=problem_type,
+df_train=df_train,
+df_val=df_val,
+label_column=label_col,
+tmpdir=tempfile.mkdtemp(),
+seed=int(args.random_seed),
+perf_table_html=train_tab_perf_html,
+threshold=args.threshold,
+)
+test_html_template, plots = build_test_html_and_plots(
+predictor,
+problem_type,
+df_test,
+label_col,
+tempfile.mkdtemp(),
+threshold=args.threshold,
+)
+def _fmt_val(v):
+if isinstance(v, (int, np.integer)):
+return f"{int(v)}"
+if isinstance(v, (float, np.floating)):
+return f"{v:.6f}"
+return str(v)
+test_scores = raw_metrics.get("Test", {})
+# Drop AutoGluon-injected ROC AUC line from the Test Performance Summary
+filtered_test_scores = {k: v for k, v in test_scores.items() if k != "AG_roc_auc"}
+metric_rows = "".join(
+f"<tr><td>{k.replace('_',' ').replace('(TNR)','(TNR)').replace('(Sensitivity/TPR)', '(Sensitivity/TPR)')}</td>"
+f"<td>{_fmt_val(v)}</td></tr>"
+for k, v in filtered_test_scores.items()
+)
+test_html_filled = test_html_template.format(metric_rows)
+is_multimodal = isinstance(predictor, MultiModalPredictor)
+leaderboard_html = "" if is_multimodal else build_leaderboard_html(predictor)
+inputs_html = ""
+ignored_features_html = "" if is_multimodal else build_ignored_features_html(predictor, df_train_full)
+presets_hparams_html = build_presets_hparams_html(predictor)
+notices: List[str] = []
+if args.threshold is not None and problem_type == "binary":
+notices.append(f"Using decision threshold = {float(args.threshold):.3f} on Test.")
+warnings_html = build_warnings_html([], notices)
+repro_html = build_reproducibility_html(args, {}, getattr(predictor, "path", None))
+transparency_blocks = "\n".join(
+[
+leaderboard_html,
+inputs_html,
+ignored_features_html,
+presets_hparams_html,
+warnings_html,
+repro_html,
+]
+)
+try:
+feature_text = build_feature_html(predictor, df_test, label_col, tempfile.mkdtemp(), args.random_seed) if df_test is not None else ""
+except Exception:
+feature_text = "<p>Feature analysis unavailable for this model.</p>"
+full_html = assemble_full_html_report(
+summary_html,
+train_html,
+test_html_filled,
+plots,
+feature_text + transparency_blocks,
+)
+with open(args.output_html, "w") as f:
+f.write(full_html)
+logger.info(f"Wrote HTML report → {args.output_html}")
+pred_path = _write_predictor_path(predictor)
+_copy_config_if_available(pred_path, args.output_config)
+outputs_to_check = [
+(args.output_json, "JSON results"),
+(args.output_html, "HTML report"),
+]
+if args.output_config:
+outputs_to_check.append((args.output_config, "AutoGluon config"))
+verify_outputs(outputs_to_check)
+def get_html_template() -> str:
+"""
+Returns the opening HTML, <head> (with CSS/JS), and opens <body> + .container.
+Includes:
+- Base styling for layout and tables
+- Sortable table headers with 3-state arrows (none ⇅, asc ↑, desc ↓)
+- A scroll helper class (.scroll-rows-30) that approximates ~30 visible rows
+- A guarded script so initializing runs only once even if injected twice
+"""
+return """
+<!DOCTYPE html>
+<html>
+<head>
+<meta charset="UTF-8">
+<title>Galaxy-Ludwig Report</title>
+<style>
+body {
+font-family: Arial, sans-serif;
+margin: 0;
+padding: 20px;
+background-color: #f4f4f4;
+}
+.container {
+max-width: 1200px;
+margin: auto;
+background: white;
+padding: 20px;
+box-shadow: 0 0 10px rgba(0, 0, 0, 0.1);
+overflow-x: auto;
+}
+h1 {
+text-align: center;
+color: #333;
+}
+h2 {
+border-bottom: 2px solid #4CAF50;
+color: #4CAF50;
+padding-bottom: 5px;
+margin-top: 28px;
+}
+/* baseline table setup */
+table {
+border-collapse: collapse;
+margin: 20px 0;
+width: 100%;
+table-layout: fixed;
+background: #fff;
+}
+table, th, td {
+border: 1px solid #ddd;
+}
+th, td {
+padding: 10px;
+text-align: center;
+vertical-align: middle;
+word-break: break-word;
+white-space: normal;
+overflow-wrap: anywhere;
+}
+th {
+background-color: #4CAF50;
+color: white;
+}
+.plot {
+text-align: center;
+margin: 20px 0;
+}
+.plot img {
+max-width: 100%;
+height: auto;
+border: 1px solid #ddd;
+}
+/* -------------------
+sortable columns (3-state: none ⇅, asc ↑, desc ↓)
+------------------- */
+table.performance-summary th.sortable {
+cursor: pointer;
+position: relative;
+user-select: none;
+}
+/* default icon space */
+table.performance-summary th.sortable::after {
+content: '⇅';
+position: absolute;
+right: 12px;
+top: 50%;
+transform: translateY(-50%);
+font-size: 0.8em;
+color: #eaf5ea; /* light on green */
+text-shadow: 0 0 1px rgba(0,0,0,0.15);
+}
+/* three states override the default */
+table.performance-summary th.sortable.sorted-none::after { content: '⇅'; color: #eaf5ea; }
+table.performance-summary th.sortable.sorted-asc::after  { content: '↑';  color: #ffffff; }
+table.performance-summary th.sortable.sorted-desc::after { content: '↓';  color: #ffffff; }
+/* show ~30 rows with a scrollbar (tweak if you want) */
+.scroll-rows-30 {
+max-height: 900px;       /* ~30 rows depending on row height */
+overflow-y: auto;        /* vertical scrollbar (“sidebar”) */
+overflow-x: auto;
+}
+/* Tabs + Help button (used by build_tabbed_html) */
+.tabs {
+display: flex;
+align-items: center;
+border-bottom: 2px solid #ccc;
+margin-bottom: 1rem;
+gap: 6px;
+flex-wrap: wrap;
+}
+.tab {
+padding: 10px 20px;
+cursor: pointer;
+border: 1px solid #ccc;
+border-bottom: none;
+background: #f9f9f9;
+margin-right: 5px;
+border-top-left-radius: 8px;
+border-top-right-radius: 8px;
+}
+.tab.active {
+background: white;
+font-weight: bold;
+}
+.help-btn {
+margin-left: auto;
+padding: 6px 12px;
+font-size: 0.9rem;
+border: 1px solid #4CAF50;
+border-radius: 4px;
+background: #4CAF50;
+color: white;
+cursor: pointer;
+}
+.tab-content {
+display: none;
+padding: 20px;
+border: 1px solid #ccc;
+border-top: none;
+background: #fff;
+}
+.tab-content.active {
+display: block;
+}
+/* Modal (used by get_metrics_help_modal) */
+.modal {
+display: none;
+position: fixed;
+z-index: 9999;
+left: 0; top: 0;
+width: 100%; height: 100%;
+overflow: auto;
+background-color: rgba(0,0,0,0.4);
+}
+.modal-content {
+background-color: #fefefe;
+margin: 8% auto;
+padding: 20px;
+border: 1px solid #888;
+width: 90%;
+max-width: 900px;
+border-radius: 8px;
+}
+.modal .close {
+color: #777;
+float: right;
+font-size: 28px;
+font-weight: bold;
+line-height: 1;
+margin-left: 8px;
+}
+.modal .close:hover,
+.modal .close:focus {
+color: black;
+text-decoration: none;
+cursor: pointer;
+}
+.metrics-guide h3 { margin-top: 20px; }
+.metrics-guide p { margin: 6px 0; }
+.metrics-guide ul { margin: 10px 0; padding-left: 20px; }
+</style>
+<script>
+// Guard to avoid double-initialization if this block is included twice
+(function(){
+if (window.__perfSummarySortInit) return;
+window.__perfSummarySortInit = true;
+function initPerfSummarySorting() {
+// Record original order for "back to original"
+document.querySelectorAll('table.performance-summary tbody').forEach(tbody => {
+Array.from(tbody.rows).forEach((row, i) => { row.dataset.originalOrder = i; });
+});
+const getText = td => (td?.innerText || '').trim();
+const cmp = (idx, asc) => (a, b) => {
+const v1 = getText(a.children[idx]);
+const v2 = getText(b.children[idx]);
+const n1 = parseFloat(v1), n2 = parseFloat(v2);
+if (!isNaN(n1) && !isNaN(n2)) return asc ? n1 - n2 : n2 - n1; // numeric
+return asc ? v1.localeCompare(v2) : v2.localeCompare(v1);       // lexical
+};
+document.querySelectorAll('table.performance-summary th.sortable').forEach(th => {
+// initialize to “none”
+th.classList.remove('sorted-asc','sorted-desc');
+th.classList.add('sorted-none');
+th.addEventListener('click', () => {
+const table = th.closest('table');
+const headerRow = th.parentNode;
+const allTh = headerRow.querySelectorAll('th.sortable');
+const tbody = table.querySelector('tbody');
+// Determine current state BEFORE clearing
+const isAsc  = th.classList.contains('sorted-asc');
+const isDesc = th.classList.contains('sorted-desc');
+// Reset all headers in this row
+allTh.forEach(x => x.classList.remove('sorted-asc','sorted-desc','sorted-none'));
+// Compute next state
+let next;
+if (!isAsc && !isDesc) {
+next = 'asc';
+} else if (isAsc) {
+next = 'desc';
+} else {
+next = 'none';
+}
+th.classList.add('sorted-' + next);
+// Sort rows according to the chosen state
+const rows = Array.from(tbody.rows);
+if (next === 'none') {
+rows.sort((a, b) => (a.dataset.originalOrder - b.dataset.originalOrder));
+} else {
+const idx = Array.from(headerRow.children).indexOf(th);
+rows.sort(cmp(idx, next === 'asc'));
+}
+rows.forEach(r => tbody.appendChild(r));
+});
+});
+}
+// Run after DOM is ready
+if (document.readyState === 'loading') {
+document.addEventListener('DOMContentLoaded', initPerfSummarySorting);
+} else {
+initPerfSummarySorting();
+}
+})();
+</script>
+</head>
+<body>
+<div class="container">
+"""
+def get_html_closing():
+"""Closes .container, body, and html."""
+return """
+</div>
+</body>
+</html>
+"""
+def build_tabbed_html(
+summary_html: str,
+train_html: str,
+test_html: str,
+feature_html: str,
+explainer_html: Optional[str] = None,
+) -> str:
+"""
+Renders the tab headers, contents, and JS to switch tabs.
+"""
+tabs = [
+'<div class="tabs">',
+'<div class="tab active" onclick="showTab(\'summary\')">Model Metric Summary and Config</div>',
+'<div class="tab" onclick="showTab(\'train\')">Train and Validation Summary</div>',
+'<div class="tab" onclick="showTab(\'test\')">Test Summary</div>',
+]
+if explainer_html:
+tabs.append('<div class="tab" onclick="showTab(\'explainer\')">Explainer Plots</div>')
+tabs.append('<button id="openMetricsHelp" class="help-btn">Help</button>')
+tabs.append('</div>')
+tabs_section = "\n".join(tabs)
+contents = [
+f'<div id="summary" class="tab-content active">{summary_html}</div>',
+f'<div id="train" class="tab-content">{train_html}</div>',
+f'<div id="test" class="tab-content">{test_html}</div>',
+]
+if explainer_html:
+contents.append(f'<div id="explainer" class="tab-content">{explainer_html}</div>')
+content_section = "\n".join(contents)
+js = """
+<script>
+function showTab(id) {
+document.querySelectorAll('.tab-content').forEach(el => el.classList.remove('active'));
+document.querySelectorAll('.tab').forEach(el => el.classList.remove('active'));
+document.getElementById(id).classList.add('active');
+document.querySelector(`.tab[onclick*="${id}"]`).classList.add('active');
+}
+</script>
+"""
+return tabs_section + "\n" + content_section + "\n" + js
+def encode_image_to_base64(image_path: str) -> str:
+"""
+Reads an image file from disk and returns a base64-encoded string
+for embedding directly in HTML <img> tags.
+"""
+try:
+with open(image_path, "rb") as img_f:
+return base64.b64encode(img_f.read()).decode("utf-8")
+except Exception as e:
+logger.error(f"Failed to encode image '{image_path}': {e}")
+return ""
+def get_model_architecture(predictor: Any) -> str:
+"""
+Returns a human-friendly description of the final model architecture based on the
+MultiModalPredictor configuration (e.g., timm_image=resnet50, hf_text=bert-base-uncased).
+"""
+# MultiModalPredictor path: read backbones from config if available
+archs = []
+for attr in ("_config", "config"):
+cfg = getattr(predictor, attr, None)
+try:
+model_cfg = getattr(cfg, "model", None)
+if model_cfg:
+# OmegaConf-like mapping
+for name, sub in dict(model_cfg).items():
+ck = None
+# sub may be an object or a dict-like node
+for k in ("checkpoint_name", "name", "model_name"):
+try:
+ck = getattr(sub, k)
+except Exception:
+ck = sub.get(k) if isinstance(sub, dict) else ck
+if ck:
+break
+if ck:
+archs.append(f"{name}={ck}")
+except Exception:
+continue
+if archs:
+return ", ".join(archs)
+# Fallback
+return type(predictor).__name__
+def collect_run_context(args, predictor, problem_type: str,
+df_train: pd.DataFrame, df_val: pd.DataFrame, df_test: pd.DataFrame,
+warnings_list: List[str],
+notes_list: List[str]) -> Dict[str, Any]:
+"""Build a dictionary with run/system context for transparency."""
+# System info (best-effort; not depending on AutoGluon stdout)
+try:
+import psutil  # optional
+mem = psutil.virtual_memory()
+mem_total_gb = mem.total / (1024 ** 3)
+mem_avail_gb = mem.available / (1024 ** 3)
+except Exception:
+mem_total_gb = mem_avail_gb = None
+ctx = {
+"timestamp": datetime.now().isoformat(timespec="seconds"),
+"python_version": platform.python_version(),
+"platform": {
+"system": platform.system(),
+"release": platform.release(),
+"version": platform.version(),
+"machine": platform.machine(),
+},
+"cpu_count": os.cpu_count(),
+"memory_total_gb": mem_total_gb,
+"memory_available_gb": mem_avail_gb,
+"packages": {},
+"problem_type": problem_type,
+"label_column": args.label_column,
+"time_limit_sec": args.time_limit,
+"random_seed": args.random_seed,
+"splits": {
+"train_rows": int(len(df_train)),
+"val_rows": int(len(df_val)),
+"test_rows": int(len(df_test)),
+"n_features_raw": int(len(df_train.columns) - 1),  # minus label
+},
+"warnings": warnings_list,
+"notes": notes_list,
+}
+# Package versions (safe best-effort)
+try:
+import autogluon
+ctx["packages"]["autogluon"] = getattr(autogluon, "__version__", "unknown")
+except Exception:
+pass
+try:
+import torch as _torch
+ctx["packages"]["torch"] = getattr(_torch, "__version__", "unknown")
+except Exception:
+pass
+try:
+import sklearn
+ctx["packages"]["scikit_learn"] = getattr(sklearn, "__version__", "unknown")
+except Exception:
+pass
+try:
+import numpy as _np
+ctx["packages"]["numpy"] = getattr(_np, "__version__", "unknown")
+except Exception:
+pass
+try:
+import pandas as _pd
+ctx["packages"]["pandas"] = getattr(_pd, "__version__", "unknown")
+except Exception:
+pass
+return ctx
+def build_class_balance_html(
+df_train: Optional[pd.DataFrame],
+label_col: str,
+df_val: Optional[pd.DataFrame] = None,
+df_test: Optional[pd.DataFrame] = None,
+) -> str:
+"""
+Render label counts for each available split (Train/Validation/Test).
+"""
+def _count_labels(frame: Optional[pd.DataFrame]) -> pd.Series:
+if frame is None or label_col not in frame:
+return pd.Series(dtype=int)
+series = frame[label_col]
+if series.dtype.kind in "ifu":
+return pd.Series(series).value_counts(dropna=False).sort_index()
+return pd.Series(series.astype(str)).value_counts(dropna=False)
+counts_train = _count_labels(df_train)
+counts_val = _count_labels(df_val)
+counts_test = _count_labels(df_test)
+labels: list[Any] = []
+for idx in (counts_train.index, counts_val.index, counts_test.index):
+for label in idx:
+if label not in labels:
+labels.append(label)
+has_train = df_train is not None
+has_val = df_val is not None
+has_test = df_test is not None
+def _fmt_count(counts: pd.Series, label: Any, enabled: bool) -> str:
+if not enabled:
+return "—"
+return str(int(counts.get(label, 0)))
+rows = [
+f"<tr><td>{_escape(label)}</td>"
+f"<td>{_fmt_count(counts_train, label, has_train)}</td>"
+f"<td>{_fmt_count(counts_val, label, has_val)}</td>"
+f"<td>{_fmt_count(counts_test, label, has_test)}</td></tr>"
+for label in labels
+]
+if not rows:
+return "<p>No label distribution available.</p>"
+return f"""
+<h3>Label Counts by Split</h3>
+<table class="table">
+<thead><tr><th>Label</th><th>Train</th><th>Validation</th><th>Test</th></tr></thead>
+<tbody>
+{''.join(rows)}
+</tbody>
+</table>
+"""
+def build_leaderboard_html(predictor) -> str:
+try:
+lb = predictor.leaderboard(silent=True)
+# keep common helpful columns if present
+cols_pref = ["model", "score_val", "eval_metric", "pred_time_val", "fit_time",
+"pred_time_val_marginal", "fit_time_marginal", "stack_level", "can_infer", "fit_order"]
+cols = [c for c in cols_pref if c in lb.columns] or list(lb.columns)
+return "<h3>Model Leaderboard (Validation)</h3>" + lb[cols].to_html(index=False)
+except Exception as e:
+return f"<h3>Model Leaderboard</h3><p>Unavailable: {_escape(e)}</p>"
+def build_ignored_features_html(predictor, df_any: pd.DataFrame) -> str:
+# MultiModalPredictor does not always expose .features(); guard accordingly.
+used = set()
+try:
+used = set(predictor.features())
+except Exception:
+# If we can't determine, don't emit a misleading section
+return ""
+raw_cols = [c for c in df_any.columns if c != getattr(predictor, "label", None)]
+ignored = [c for c in raw_cols if c not in used]
+if not ignored:
+return ""
+items = "".join(f"<li>{html.escape(c)}</li>" for c in ignored)
+return f"""
+<h3>Ignored / Unused Features</h3>
+<p>The following columns were not used by the trained predictor at inference time:</p>
+<ul>{items}</ul>
+"""
+def build_presets_hparams_html(predictor) -> str:
+# MultiModalPredictor path
+mm_hp = {}
+for attr in ("_config", "config", "_fit_args"):
+if hasattr(predictor, attr):
+try:
+val = getattr(predictor, attr)
+# make it JSON-ish
+mm_hp[attr] = str(val)
+except Exception:
+continue
+hp_html = f"<pre>{html.escape(json.dumps(mm_hp, indent=2))}</pre>" if mm_hp else "<i>Unavailable</i>"
+return f"<h3>Training Presets & Hyperparameters</h3><details open><summary>Show hyperparameters</summary>{hp_html}</details>"
+def build_warnings_html(warnings_list: List[str], notes_list: List[str]) -> str:
+if not warnings_list and not notes_list:
+return ""
+w_html = "".join(f"<li>{_escape(w)}</li>" for w in warnings_list)
+n_html = "".join(f"<li>{_escape(n)}</li>" for n in notes_list)
+return f"""
+<h3>Warnings & Notices</h3>
+{'<h4>Warnings</h4><ul>'+w_html+'</ul>' if warnings_list else ''}
+{'<h4>Notices</h4><ul>'+n_html+'</ul>' if notes_list else ''}
+"""
+def build_reproducibility_html(args, ctx: Dict[str, Any], model_path: Optional[str]) -> str:
+cmd = " ".join(_escape(x) for x in sys.argv)
+load_snippet = ""
+if model_path:
+load_snippet = f"""<pre>
+from autogluon.multimodal import MultiModalPredictor
+predictor = MultiModalPredictor.load("{_escape(model_path)}")
+</pre>"""
+pkg_rows = "".join(f"<tr><td>{_escape(k)}</td><td>{_escape(v)}</td></tr>" for k, v in (ctx.get("packages") or {}).items())
+sys_table = f"""
+<table class="table">
+<tbody>
+<tr><th>Timestamp</th><td>{_escape(ctx.get('timestamp'))}</td></tr>
+<tr><th>Python</th><td>{_escape(ctx.get('python_version'))}</td></tr>
+<tr><th>Platform</th><td>{_escape(ctx.get('platform'))}</td></tr>
+<tr><th>CPU Count</th><td>{_escape(ctx.get('cpu_count'))}</td></tr>
+<tr><th>Memory (GB)</th><td>Total: {_escape(ctx.get('memory_total_gb'))} | Avail: {_escape(ctx.get('memory_available_gb'))}</td></tr>
+<tr><th>Seed</th><td>{_escape(ctx.get('random_seed'))}</td></tr>
+<tr><th>Time Limit (s)</th><td>{_escape(ctx.get('time_limit_sec'))}</td></tr>
+</tbody>
+</table>
+"""
+pkgs_table = f"""
+<h4>Package Versions</h4>
+<table class="table">
+<thead><tr><th>Package</th><th>Version</th></tr></thead>
+<tbody>{pkg_rows}</tbody>
+</table>
+"""
+return f"""
+<h3>Reproducibility</h3>
+<h4>Command</h4>
+<pre>{cmd}</pre>
+{sys_table}
+{pkgs_table}
+<h4>Load Trained Model</h4>
+{load_snippet or '<i>Model path not available</i>'}
+"""
+def build_modalities_html(predictor, df_any: pd.DataFrame, label_col: str, image_col: Optional[str]) -> str:
+"""Summarize which inputs/modalities are used for MultiModalPredictor."""
+cols = [c for c in df_any.columns]
+# exclude label from feature list
+feat_cols = [c for c in cols if c != label_col]
+# identify image vs tabular columns from args / presence
+img_present = (image_col in df_any.columns) if image_col else False
+tab_cols = [c for c in feat_cols if c != image_col]
+# brief lists (avoid dumping all, unless small)
+def list_or_count(arr, max_show=20):
+if len(arr) <= max_show:
+items = "".join(f"<li>{html.escape(str(x))}</li>" for x in arr)
+return f"<ul>{items}</ul>"
+return f"<p>{len(arr)} columns</p>"
+img_block = f"<p><b>Image column:</b> {html.escape(image_col)}</p>" if img_present else "<p><b>Image column:</b> None</p>"
+tab_block = f"<div><b>Structured columns:</b> {len(tab_cols)}{list_or_count(tab_cols, max_show=15)}</div>"
+return f"""
+<h3>Modalities & Inputs</h3>
+<p>This run used <b>MultiModalPredictor</b> (images + structured features).</p>
+<p><b>Label column:</b> {html.escape(label_col)}</p>
+{img_block}
+{tab_block}
+"""
+def build_model_performance_summary_table(
+train_scores: dict,
+val_scores: dict,
+test_scores: dict | None = None,
+include_test: bool = True,
+title: str | None = 'Model Performance Summary',
+show_title: bool = True,
+) -> str:
+"""
+Returns an HTML table for metrics, optionally hiding the Test column.
+Keys across score dicts are unioned; missing values render as '—'.
+"""
+def fmt(v):
+if v is None:
+return '—'
+if isinstance(v, (int, float)):
+return f'{v:.4f}'
+return str(v)
+# Collect union of metric keys across splits
+metrics = set(train_scores.keys()) | set(val_scores.keys()) | (set(test_scores.keys()) if (include_test and test_scores) else set())
+# Remove AG_roc_auc entirely as requested
+metrics.discard('AG_roc_auc')
+# Helper: normalize metric keys for matching preferred names
+def _norm(k: str) -> str:
+return ''.join(ch for ch in str(k).lower() if ch.isalnum())
+# Preferred metrics to appear at the end in this specific order (display names):
+preferred_display = ['Accuracy', 'ROC-AUC', 'Precision', 'Recall', 'F1-Score', 'PR-AUC', 'Specificity', 'MCC', 'LogLoss']
+# Mapping of normalized key -> display label
+norm_to_display = {
+'accuracy': 'Accuracy',
+'acc': 'Accuracy',
+'rocauc': 'ROC-AUC',
+'roc_auc': 'ROC-AUC',
+'rocaucscore': 'ROC-AUC',
+'precision': 'Precision',
+'prec': 'Precision',
+'recall': 'Recall',
+'recallsensitivitytpr': 'Recall',
+'f1': 'F1-Score',
+'f1score': 'F1-Score',
+'pr_auc': 'PR-AUC',
+'prauc': 'PR-AUC',
+'averageprecision': 'PR-AUC',
+'specificity': 'Specificity',
+'tnr': 'Specificity',
+'mcc': 'MCC',
+'logloss': 'LogLoss',
+'crossentropy': 'LogLoss',
+}
+# Build ordered list: all non-preferred metrics sorted alphabetically, then preferred metrics in the requested order if present
+preferred_norms = [_norm(x) for x in preferred_display]
+all_metrics = list(metrics)
+# Partition
+preferred_present = []
+others = []
+for m in sorted(all_metrics):
+nm = _norm(m)
+if nm in preferred_norms or any(
+p in nm for p in ["rocauc", "prauc", "f1", "mcc", "logloss", "accuracy", "precision", "recall", "specificity"]
+):
+# Defer preferred-like metrics to the end (we will place them in canonical order)
+preferred_present.append(m)
+else:
+others.append(m)
+# Now assemble final metric order: others (alpha), then preferred in exact requested order if they exist in metrics
+final_metrics = []
+final_metrics.extend(others)
+for disp in preferred_display:
+# find any original key matching this display (by normalized mapping)
+target_norm = _norm(disp)
+found = None
+for m in preferred_present:
+if _norm(m) == target_norm or norm_to_display.get(_norm(m)) == disp or _norm(m).replace(' ', '') == target_norm:
+found = m
+break
+# also allow substring matches (e.g., 'roc_auc' vs 'rocauc')
+if target_norm in _norm(m):
+found = m
+break
+if found:
+final_metrics.append(found)
+metrics = final_metrics
+# Make all headers sortable by adding the 'sortable' class; the JS in utils.py hooks table.performance-summary
+header_cells = [
+'<th class="sortable">Metric</th>',
+'<th class="sortable">Train</th>',
+'<th class="sortable">Validation</th>'
+]
+if include_test and test_scores:
+header_cells.append('<th class="sortable">Test</th>')
+rows_html = []
+for m in metrics:
+# Display label mapping: clean up common verbose names
+disp = m
+nm = _norm(m)
+if nm in norm_to_display:
+disp = norm_to_display[nm]
+else:
+# generic cleanup: replace underscores with space and remove parenthetical qualifiers
+disp = str(m).replace('_', ' ')
+disp = disp.replace('(Sensitivity/TPR)', '')
+disp = disp.replace('(TNR)', '')
+disp = disp.strip()
+cells = [
+f'<td>{_escape(disp)}</td>',
+f'<td>{fmt(train_scores.get(m))}</td>',
+f'<td>{fmt(val_scores.get(m))}</td>',
+]
+if include_test and test_scores:
+cells.append(f'<td>{fmt(test_scores.get(m))}</td>')
+rows_html.append('<tr>' + ''.join(cells) + '</tr>')
+title_html = f'<h3 style="margin-top:0">{title}</h3>' if (show_title and title) else ''
+table_html = f"""
+{title_html}
+<table class="performance-summary">
+<thead><tr>{''.join(header_cells)}</tr></thead>
+<tbody>{''.join(rows_html)}</tbody>
+</table>
+"""
+return table_html

Mercurial > repos > goeckslab > multimodal_learner

comparison report_utils.py @ 0:375c36923da1 draft default tip