Mercurial > repos > goeckslab > image_learner
comparison utils.py @ 9:9e912fce264c draft default tip
planemo upload for repository https://github.com/goeckslab/gleam.git commit eace0d7c2b2939029c052991d238a54947d2e191
| author | goeckslab |
|---|---|
| date | Wed, 27 Aug 2025 21:02:48 +0000 |
| parents | 85e6f4b2ad18 |
| children |
comparison
equal
deleted
inserted
replaced
| 8:85e6f4b2ad18 | 9:9e912fce264c |
|---|---|
| 6 return """ | 6 return """ |
| 7 <html> | 7 <html> |
| 8 <head> | 8 <head> |
| 9 <meta charset="UTF-8"> | 9 <meta charset="UTF-8"> |
| 10 <title>Galaxy-Ludwig Report</title> | 10 <title>Galaxy-Ludwig Report</title> |
| 11 | |
| 12 <!-- your existing styles --> | |
| 13 <style> | 11 <style> |
| 14 body { | 12 body { |
| 15 font-family: Arial, sans-serif; | 13 font-family: Arial, sans-serif; |
| 16 margin: 0; | 14 margin: 0; |
| 17 padding: 20px; | 15 padding: 20px; |
| 326 def get_metrics_help_modal() -> str: | 324 def get_metrics_help_modal() -> str: |
| 327 modal_html = ( | 325 modal_html = ( |
| 328 '<div id="metricsHelpModal" class="modal">' | 326 '<div id="metricsHelpModal" class="modal">' |
| 329 ' <div class="modal-content">' | 327 ' <div class="modal-content">' |
| 330 ' <span class="close">×</span>' | 328 ' <span class="close">×</span>' |
| 331 ' <h2>Model Evaluation Metrics — Help Guide</h2>' | 329 " <h2>Model Evaluation Metrics — Help Guide</h2>" |
| 332 ' <div class="metrics-guide">' | 330 ' <div class="metrics-guide">' |
| 333 ' <h3>1) General Metrics (Regression and Classification)</h3>' | 331 " <h3>1) General Metrics (Regression and Classification)</h3>" |
| 334 ' <p><strong>Loss (Regression & Classification):</strong> ' | 332 " <p><strong>Loss (Regression & Classification):</strong> " |
| 335 'Measures the difference between predicted and actual values, ' | 333 "Measures the difference between predicted and actual values, " |
| 336 'optimized during training. Lower is better. ' | 334 "optimized during training. Lower is better. " |
| 337 'For regression, this is often Mean Squared Error (MSE) or ' | 335 "For regression, this is often Mean Squared Error (MSE) or " |
| 338 'Mean Absolute Error (MAE). For classification, it’s typically ' | 336 "Mean Absolute Error (MAE). For classification, it’s typically " |
| 339 'cross-entropy or log loss.</p>' | 337 "cross-entropy or log loss.</p>" |
| 340 ' <h3>2) Regression Metrics</h3>' | 338 " <h3>2) Regression Metrics</h3>" |
| 341 ' <p><strong>Mean Absolute Error (MAE):</strong> ' | 339 " <p><strong>Mean Absolute Error (MAE):</strong> " |
| 342 'Average of absolute differences between predicted and actual values, ' | 340 "Average of absolute differences between predicted and actual values, " |
| 343 'in the same units as the target. Use for interpretable error measurement ' | 341 "in the same units as the target. Use for interpretable error measurement " |
| 344 'when all errors are equally important. Less sensitive to outliers than MSE.</p>' | 342 "when all errors are equally important. Less sensitive to outliers than MSE.</p>" |
| 345 ' <p><strong>Mean Squared Error (MSE):</strong> ' | 343 " <p><strong>Mean Squared Error (MSE):</strong> " |
| 346 'Average of squared differences between predicted and actual values. ' | 344 "Average of squared differences between predicted and actual values. " |
| 347 'Penalizes larger errors more heavily, useful when large deviations are critical. ' | 345 "Penalizes larger errors more heavily, useful when large deviations are critical. " |
| 348 'Often used as the loss function in regression.</p>' | 346 "Often used as the loss function in regression.</p>" |
| 349 ' <p><strong>Root Mean Squared Error (RMSE):</strong> ' | 347 " <p><strong>Root Mean Squared Error (RMSE):</strong> " |
| 350 'Square root of MSE, in the same units as the target. ' | 348 "Square root of MSE, in the same units as the target. " |
| 351 'Balances interpretability and sensitivity to large errors. ' | 349 "Balances interpretability and sensitivity to large errors. " |
| 352 'Widely used for regression evaluation.</p>' | 350 "Widely used for regression evaluation.</p>" |
| 353 ' <p><strong>Mean Absolute Percentage Error (MAPE):</strong> ' | 351 " <p><strong>Mean Absolute Percentage Error (MAPE):</strong> " |
| 354 'Average absolute error as a percentage of actual values. ' | 352 "Average absolute error as a percentage of actual values. " |
| 355 'Scale-independent, ideal for comparing relative errors across datasets. ' | 353 "Scale-independent, ideal for comparing relative errors across datasets. " |
| 356 'Avoid when actual values are near zero.</p>' | 354 "Avoid when actual values are near zero.</p>" |
| 357 ' <p><strong>Root Mean Squared Percentage Error (RMSPE):</strong> ' | 355 " <p><strong>Root Mean Squared Percentage Error (RMSPE):</strong> " |
| 358 'Square root of mean squared percentage error. Scale-independent, ' | 356 "Square root of mean squared percentage error. Scale-independent, " |
| 359 'penalizes larger relative errors more than MAPE. Use for forecasting ' | 357 "penalizes larger relative errors more than MAPE. Use for forecasting " |
| 360 'or when relative accuracy matters.</p>' | 358 "or when relative accuracy matters.</p>" |
| 361 ' <p><strong>R² Score:</strong> Proportion of variance in the target ' | 359 " <p><strong>R² Score:</strong> Proportion of variance in the target " |
| 362 'explained by the model. Ranges from negative infinity to 1 (perfect prediction). ' | 360 "explained by the model. Ranges from negative infinity to 1 (perfect prediction). " |
| 363 'Use to assess model fit; negative values indicate poor performance ' | 361 "Use to assess model fit; negative values indicate poor performance " |
| 364 'compared to predicting the mean.</p>' | 362 "compared to predicting the mean.</p>" |
| 365 ' <h3>3) Classification Metrics</h3>' | 363 " <h3>3) Classification Metrics</h3>" |
| 366 ' <p><strong>Accuracy:</strong> Proportion of correct predictions ' | 364 " <p><strong>Accuracy:</strong> Proportion of correct predictions " |
| 367 'among all predictions. Simple but misleading for imbalanced datasets, ' | 365 "among all predictions. Simple but misleading for imbalanced datasets, " |
| 368 'where high accuracy may hide poor performance on minority classes.</p>' | 366 "where high accuracy may hide poor performance on minority classes.</p>" |
| 369 ' <p><strong>Micro Accuracy:</strong> Sums true positives and true negatives ' | 367 " <p><strong>Micro Accuracy:</strong> Sums true positives and true negatives " |
| 370 'across all classes before computing accuracy. Suitable for multiclass or ' | 368 "across all classes before computing accuracy. Suitable for multiclass or " |
| 371 'multilabel problems with imbalanced data.</p>' | 369 "multilabel problems with imbalanced data.</p>" |
| 372 ' <p><strong>Token Accuracy:</strong> Measures how often predicted tokens ' | 370 " <p><strong>Token Accuracy:</strong> Measures how often predicted tokens " |
| 373 '(e.g., in sequences) match true tokens. Common in NLP tasks like text generation ' | 371 "(e.g., in sequences) match true tokens. Common in NLP tasks like text generation " |
| 374 'or token classification.</p>' | 372 "or token classification.</p>" |
| 375 ' <p><strong>Precision:</strong> Proportion of positive predictions that are ' | 373 " <p><strong>Precision:</strong> Proportion of positive predictions that are " |
| 376 'correct (TP / (TP + FP)). Use when false positives are costly, e.g., spam detection.</p>' | 374 "correct (TP / (TP + FP)). Use when false positives are costly, e.g., spam detection.</p>" |
| 377 ' <p><strong>Recall (Sensitivity):</strong> Proportion of actual positives ' | 375 " <p><strong>Recall (Sensitivity):</strong> Proportion of actual positives " |
| 378 'correctly predicted (TP / (TP + FN)). Use when missing positives is risky, ' | 376 "correctly predicted (TP / (TP + FN)). Use when missing positives is risky, " |
| 379 'e.g., disease detection.</p>' | 377 "e.g., disease detection.</p>" |
| 380 ' <p><strong>Specificity:</strong> True negative rate (TN / (TN + FP)). ' | 378 " <p><strong>Specificity:</strong> True negative rate (TN / (TN + FP)). " |
| 381 'Measures ability to identify negatives. Useful in medical testing to avoid ' | 379 "Measures ability to identify negatives. Useful in medical testing to avoid " |
| 382 'false alarms.</p>' | 380 "false alarms.</p>" |
| 383 ' <h3>4) Classification: Macro, Micro, and Weighted Averages</h3>' | 381 " <h3>4) Classification: Macro, Micro, and Weighted Averages</h3>" |
| 384 ' <p><strong>Macro Precision / Recall / F1:</strong> Averages the metric ' | 382 " <p><strong>Macro Precision / Recall / F1:</strong> Averages the metric " |
| 385 'across all classes, treating each equally. Best for balanced datasets where ' | 383 "across all classes, treating each equally. Best for balanced datasets where " |
| 386 'all classes are equally important.</p>' | 384 "all classes are equally important.</p>" |
| 387 ' <p><strong>Micro Precision / Recall / F1:</strong> Aggregates true positives, ' | 385 " <p><strong>Micro Precision / Recall / F1:</strong> Aggregates true positives, " |
| 388 'false positives, and false negatives across all classes before computing. ' | 386 "false positives, and false negatives across all classes before computing. " |
| 389 'Ideal for imbalanced or multilabel classification.</p>' | 387 "Ideal for imbalanced or multilabel classification.</p>" |
| 390 ' <p><strong>Weighted Precision / Recall / F1:</strong> Averages metrics ' | 388 " <p><strong>Weighted Precision / Recall / F1:</strong> Averages metrics " |
| 391 'across classes, weighted by the number of true instances per class. Balances ' | 389 "across classes, weighted by the number of true instances per class. Balances " |
| 392 'class importance based on frequency.</p>' | 390 "class importance based on frequency.</p>" |
| 393 ' <h3>5) Classification: Average Precision (PR-AUC Variants)</h3>' | 391 " <h3>5) Classification: Average Precision (PR-AUC Variants)</h3>" |
| 394 ' <p><strong>Average Precision Macro:</strong> Precision-Recall AUC averaged ' | 392 " <p><strong>Average Precision Macro:</strong> Precision-Recall AUC averaged " |
| 395 'equally across classes. Use for balanced multiclass problems.</p>' | 393 "equally across classes. Use for balanced multiclass problems.</p>" |
| 396 ' <p><strong>Average Precision Micro:</strong> Global Precision-Recall AUC ' | 394 " <p><strong>Average Precision Micro:</strong> Global Precision-Recall AUC " |
| 397 'using all instances. Best for imbalanced or multilabel classification.</p>' | 395 "using all instances. Best for imbalanced or multilabel classification.</p>" |
| 398 ' <p><strong>Average Precision Samples:</strong> Precision-Recall AUC averaged ' | 396 " <p><strong>Average Precision Samples:</strong> Precision-Recall AUC averaged " |
| 399 'across individual samples. Ideal for multilabel tasks where samples have multiple ' | 397 "across individual samples. Ideal for multilabel tasks where samples have multiple " |
| 400 'labels.</p>' | 398 "labels.</p>" |
| 401 ' <h3>6) Classification: ROC-AUC Variants</h3>' | 399 " <h3>6) Classification: ROC-AUC Variants</h3>" |
| 402 ' <p><strong>ROC-AUC:</strong> Measures ability to distinguish between classes. ' | 400 " <p><strong>ROC-AUC:</strong> Measures ability to distinguish between classes. " |
| 403 'AUC = 1 is perfect; 0.5 is random guessing. Use for binary classification.</p>' | 401 "AUC = 1 is perfect; 0.5 is random guessing. Use for binary classification.</p>" |
| 404 ' <p><strong>Macro ROC-AUC:</strong> Averages AUC across all classes equally. ' | 402 " <p><strong>Macro ROC-AUC:</strong> Averages AUC across all classes equally. " |
| 405 'Suitable for balanced multiclass problems.</p>' | 403 "Suitable for balanced multiclass problems.</p>" |
| 406 ' <p><strong>Micro ROC-AUC:</strong> Computes AUC from aggregated predictions ' | 404 " <p><strong>Micro ROC-AUC:</strong> Computes AUC from aggregated predictions " |
| 407 'across all classes. Useful for imbalanced or multilabel settings.</p>' | 405 "across all classes. Useful for imbalanced or multilabel settings.</p>" |
| 408 ' <h3>7) Classification: Confusion Matrix Stats (Per Class)</h3>' | 406 " <h3>7) Classification: Confusion Matrix Stats (Per Class)</h3>" |
| 409 ' <p><strong>True Positives / Negatives (TP / TN):</strong> Correct predictions ' | 407 " <p><strong>True Positives / Negatives (TP / TN):</strong> Correct predictions " |
| 410 'for positives and negatives, respectively.</p>' | 408 "for positives and negatives, respectively.</p>" |
| 411 ' <p><strong>False Positives / Negatives (FP / FN):</strong> Incorrect predictions ' | 409 " <p><strong>False Positives / Negatives (FP / FN):</strong> Incorrect predictions " |
| 412 '— false alarms and missed detections.</p>' | 410 "— false alarms and missed detections.</p>" |
| 413 ' <h3>8) Classification: Ranking Metrics</h3>' | 411 " <h3>8) Classification: Ranking Metrics</h3>" |
| 414 ' <p><strong>Hits at K:</strong> Measures whether the true label is among the ' | 412 " <p><strong>Hits at K:</strong> Measures whether the true label is among the " |
| 415 'top-K predictions. Common in recommendation systems and retrieval tasks.</p>' | 413 "top-K predictions. Common in recommendation systems and retrieval tasks.</p>" |
| 416 ' <h3>9) Other Metrics (Classification)</h3>' | 414 " <h3>9) Other Metrics (Classification)</h3>" |
| 417 ' <p><strong>Cohen\'s Kappa:</strong> Measures agreement between predicted and ' | 415 " <p><strong>Cohen's Kappa:</strong> Measures agreement between predicted and " |
| 418 'actual labels, adjusted for chance. Useful for multiclass classification with ' | 416 "actual labels, adjusted for chance. Useful for multiclass classification with " |
| 419 'imbalanced data.</p>' | 417 "imbalanced data.</p>" |
| 420 ' <p><strong>Matthews Correlation Coefficient (MCC):</strong> Balanced measure ' | 418 " <p><strong>Matthews Correlation Coefficient (MCC):</strong> Balanced measure " |
| 421 'using TP, TN, FP, and FN. Effective for imbalanced datasets.</p>' | 419 "using TP, TN, FP, and FN. Effective for imbalanced datasets.</p>" |
| 422 ' <h3>10) Metric Recommendations</h3>' | 420 " <h3>10) Metric Recommendations</h3>" |
| 423 ' <ul>' | 421 " <ul>" |
| 424 ' <li><strong>Regression:</strong> Use <strong>RMSE</strong> or ' | 422 " <li><strong>Regression:</strong> Use <strong>RMSE</strong> or " |
| 425 '<strong>MAE</strong> for general evaluation, <strong>MAPE</strong> for relative ' | 423 "<strong>MAE</strong> for general evaluation, <strong>MAPE</strong> for relative " |
| 426 'errors, and <strong>R²</strong> to assess model fit. Use <strong>MSE</strong> or ' | 424 "errors, and <strong>R²</strong> to assess model fit. Use <strong>MSE</strong> or " |
| 427 '<strong>RMSPE</strong> when large errors are critical.</li>' | 425 "<strong>RMSPE</strong> when large errors are critical.</li>" |
| 428 ' <li><strong>Classification (Balanced Data):</strong> Use <strong>Accuracy</strong> ' | 426 " <li><strong>Classification (Balanced Data):</strong> Use <strong>Accuracy</strong> " |
| 429 'and <strong>F1</strong> for overall performance.</li>' | 427 "and <strong>F1</strong> for overall performance.</li>" |
| 430 ' <li><strong>Classification (Imbalanced Data):</strong> Use <strong>Precision</strong>, ' | 428 " <li><strong>Classification (Imbalanced Data):</strong> Use <strong>Precision</strong>, " |
| 431 '<strong>Recall</strong>, and <strong>ROC-AUC</strong> to focus on minority class ' | 429 "<strong>Recall</strong>, and <strong>ROC-AUC</strong> to focus on minority class " |
| 432 'performance.</li>' | 430 "performance.</li>" |
| 433 ' <li><strong>Multilabel or Imbalanced Classification:</strong> Use ' | 431 " <li><strong>Multilabel or Imbalanced Classification:</strong> Use " |
| 434 '<strong>Micro Precision/Recall/F1</strong> or <strong>Micro ROC-AUC</strong>.</li>' | 432 "<strong>Micro Precision/Recall/F1</strong> or <strong>Micro ROC-AUC</strong>.</li>" |
| 435 ' <li><strong>Balanced Multiclass:</strong> Use <strong>Macro Precision/Recall/F1</strong> ' | 433 " <li><strong>Balanced Multiclass:</strong> Use <strong>Macro Precision/Recall/F1</strong> " |
| 436 'or <strong>Macro ROC-AUC</strong>.</li>' | 434 "or <strong>Macro ROC-AUC</strong>.</li>" |
| 437 ' <li><strong>Class Frequency Matters:</strong> Use <strong>Weighted Precision/Recall/F1</strong> ' | 435 " <li><strong>Class Frequency Matters:</strong> Use <strong>Weighted Precision/Recall/F1</strong> " |
| 438 'to account for class imbalance.</li>' | 436 "to account for class imbalance.</li>" |
| 439 ' <li><strong>Recommendation/Ranking:</strong> Use <strong>Hits at K</strong> for retrieval tasks.</li>' | 437 " <li><strong>Recommendation/Ranking:</strong> Use <strong>Hits at K</strong> for retrieval tasks.</li>" |
| 440 ' <li><strong>Detailed Analysis:</strong> Use <strong>Confusion Matrix stats</strong> ' | 438 " <li><strong>Detailed Analysis:</strong> Use <strong>Confusion Matrix stats</strong> " |
| 441 'for class-wise performance in classification.</li>' | 439 "for class-wise performance in classification.</li>" |
| 442 ' </ul>' | 440 " </ul>" |
| 443 ' </div>' | 441 " </div>" |
| 444 ' </div>' | 442 " </div>" |
| 445 '</div>' | 443 "</div>" |
| 446 ) | 444 ) |
| 447 modal_css = ( | 445 modal_css = ( |
| 448 "<style>" | 446 "<style>" |
| 449 ".modal {" | 447 ".modal {" |
| 450 " display: none;" | 448 " display: none;" |
| 495 ' var modal = document.getElementById("metricsHelpModal");' | 493 ' var modal = document.getElementById("metricsHelpModal");' |
| 496 ' var openBtn = document.getElementById("openMetricsHelp");' | 494 ' var openBtn = document.getElementById("openMetricsHelp");' |
| 497 ' var span = document.getElementsByClassName("close")[0];' | 495 ' var span = document.getElementsByClassName("close")[0];' |
| 498 " if (openBtn && modal) {" | 496 " if (openBtn && modal) {" |
| 499 " openBtn.onclick = function() {" | 497 " openBtn.onclick = function() {" |
| 500 " modal.style.display = \"block\";" | 498 ' modal.style.display = "block";' |
| 501 " };" | 499 " };" |
| 502 " }" | 500 " }" |
| 503 " if (span && modal) {" | 501 " if (span && modal) {" |
| 504 " span.onclick = function() {" | 502 " span.onclick = function() {" |
| 505 " modal.style.display = \"none\";" | 503 ' modal.style.display = "none";' |
| 506 " };" | 504 " };" |
| 507 " }" | 505 " }" |
| 508 " window.onclick = function(event) {" | 506 " window.onclick = function(event) {" |
| 509 " if (event.target == modal) {" | 507 " if (event.target == modal) {" |
| 510 " modal.style.display = \"none\";" | 508 ' modal.style.display = "none";' |
| 511 " }" | 509 " }" |
| 512 " }" | 510 " }" |
| 513 "});" | 511 "});" |
| 514 "</script>" | 512 "</script>" |
| 515 ) | 513 ) |
