annotate base_model_trainer.py @ 6:4bd75b45a7a1 draft default tip

planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
author goeckslab
date Fri, 01 Aug 2025 14:02:13 +0000
parents 3d42f82b3c7f
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
1 import base64
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
2 import logging
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
3 import tempfile
4
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
4 from pathlib import Path
0
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
5
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
6 import h5py
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
7 import joblib
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
8 import numpy as np
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
9 import pandas as pd
2
77c88226bfde planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
10 from feature_help_modal import get_feature_metrics_help_modal
0
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
11 from feature_importance import FeatureImportanceAnalyzer
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
12 from sklearn.metrics import average_precision_score
4
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
13 from utils import (
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
14 add_hr_to_html,
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
15 add_plot_to_html,
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
16 build_tabbed_html,
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
17 encode_image_to_base64,
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
18 get_html_closing,
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
19 get_html_template,
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
20 )
0
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
21
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
22 logging.basicConfig(level=logging.DEBUG)
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
23 LOG = logging.getLogger(__name__)
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
24
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
25
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
26 class BaseModelTrainer:
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
27 def __init__(
2
77c88226bfde planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
28 self,
77c88226bfde planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
29 input_file,
77c88226bfde planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
30 target_col,
77c88226bfde planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
31 output_dir,
77c88226bfde planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
32 task_type,
77c88226bfde planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
33 random_seed,
77c88226bfde planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
34 test_file=None,
77c88226bfde planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
35 **kwargs,
77c88226bfde planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
36 ):
4
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
37 self.exp = None
0
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
38 self.input_file = input_file
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
39 self.target_col = target_col
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
40 self.output_dir = output_dir
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
41 self.task_type = task_type
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
42 self.random_seed = random_seed
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
43 self.data = None
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
44 self.target = None
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
45 self.best_model = None
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
46 self.results = None
6
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
47 self.tuning_results = None
0
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
48 self.features_name = None
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
49 self.plots = {}
4
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
50 self.explainer_plots = {}
0
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
51 self.plots_explainer_html = None
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
52 self.trees = []
4
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
53 self.user_kwargs = kwargs.copy()
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
54 for key, value in self.user_kwargs.items():
0
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
55 setattr(self, key, value)
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
56 self.setup_params = {}
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
57 self.test_file = test_file
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
58 self.test_data = None
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
59
2
77c88226bfde planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
60 if not self.output_dir:
6
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
61 raise ValueError(
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
62 "output_dir must be specified and not None"
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
63 )
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
64
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
65 # Warn about irrelevant kwargs for the task type
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
66 if self.task_type == "regression" and (
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
67 "probability_threshold" in self.user_kwargs
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
68 ):
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
69 LOG.warning(
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
70 "probability_threshold is ignored for regression tasks."
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
71 )
2
77c88226bfde planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
72
0
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
73 LOG.info(f"Model kwargs: {self.__dict__}")
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
74
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
75 def load_data(self):
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
76 LOG.info(f"Loading data from {self.input_file}")
6
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
77 self.data = pd.read_csv(
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
78 self.input_file, sep=None, engine="python"
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
79 )
2
77c88226bfde planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
80 self.data.columns = self.data.columns.str.replace(".", "_")
6
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
81
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
82 names = self.data.columns.to_list()
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
83 LOG.info(f"Original dataset columns: {names}")
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
84
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
85 target_index = int(self.target_col) - 1
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
86 num_cols = len(names)
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
87 if target_index < 0 or target_index >= num_cols:
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
88 raise ValueError(
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
89 f"Target column number {self.target_col} is invalid. "
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
90 f"Please select a number between 1 and {num_cols}."
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
91 )
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
92
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
93 self.target = names[target_index]
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
94
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
95 # Conditional drop: only if 'prediction_label' exists and is not
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
96 # the target
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
97 if "prediction_label" in self.data.columns and (
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
98 self.data.columns[target_index] != "prediction_label"
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
99 ):
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
100 LOG.info(
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
101 "Dropping 'prediction_label' column as it's not the target."
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
102 )
2
77c88226bfde planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
103 self.data = self.data.drop(columns=["prediction_label"])
6
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
104 else:
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
105 if self.target == "prediction_label":
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
106 LOG.warning(
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
107 "Using 'prediction_label' as target column. "
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
108 "This may not be intended if it's a previous prediction."
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
109 )
2
77c88226bfde planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
110
6
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
111 numeric_cols = self.data.select_dtypes(
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
112 include=["number"]
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
113 ).columns
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
114 non_numeric_cols = self.data.select_dtypes(
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
115 exclude=["number"]
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
116 ).columns
0
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
117 self.data[numeric_cols] = self.data[numeric_cols].apply(
2
77c88226bfde planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
118 pd.to_numeric, errors="coerce"
77c88226bfde planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
119 )
0
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
120 if len(non_numeric_cols) > 0:
6
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
121 LOG.info(
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
122 f"Non-numeric columns found: {non_numeric_cols.tolist()}"
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
123 )
0
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
124
6
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
125 # Update names after possible drop
0
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
126 names = self.data.columns.to_list()
6
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
127 LOG.info(f"Dataset columns after processing: {names}")
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
128
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
129 self.features_name = [n for n in names if n != self.target]
4
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
130
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
131 if getattr(self, "missing_value_strategy", None):
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
132 strat = self.missing_value_strategy
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
133 if strat == "mean":
6
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
134 self.data = self.data.fillna(
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
135 self.data.mean(numeric_only=True)
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
136 )
4
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
137 elif strat == "median":
6
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
138 self.data = self.data.fillna(
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
139 self.data.median(numeric_only=True)
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
140 )
4
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
141 elif strat == "drop":
0
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
142 self.data = self.data.dropna()
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
143 else:
6
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
144 self.data = self.data.fillna(
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
145 self.data.median(numeric_only=True)
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
146 )
0
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
147
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
148 if self.test_file:
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
149 LOG.info(f"Loading test data from {self.test_file}")
6
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
150 df_test = pd.read_csv(
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
151 self.test_file, sep=None, engine="python"
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
152 )
4
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
153 df_test.columns = df_test.columns.str.replace(".", "_")
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
154 self.test_data = df_test
0
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
155
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
156 def setup_pycaret(self):
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
157 LOG.info("Initializing PyCaret")
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
158 self.setup_params = {
2
77c88226bfde planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
159 "target": self.target,
77c88226bfde planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
160 "session_id": self.random_seed,
77c88226bfde planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
161 "html": True,
77c88226bfde planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
162 "log_experiment": False,
77c88226bfde planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
163 "system_log": False,
77c88226bfde planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
164 "index": False,
0
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
165 }
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
166 if self.test_data is not None:
2
77c88226bfde planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
167 self.setup_params["test_data"] = self.test_data
4
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
168 for attr in [
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
169 "train_size",
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
170 "normalize",
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
171 "feature_selection",
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
172 "remove_outliers",
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
173 "remove_multicollinearity",
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
174 "polynomial_features",
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
175 "feature_interaction",
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
176 "feature_ratio",
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
177 "fix_imbalance",
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
178 ]:
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
179 val = getattr(self, attr, None)
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
180 if val is not None:
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
181 self.setup_params[attr] = val
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
182 if getattr(self, "cross_validation_folds", None) is not None:
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
183 self.setup_params["fold"] = self.cross_validation_folds
0
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
184 LOG.info(self.setup_params)
2
77c88226bfde planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
185
77c88226bfde planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
186 if self.task_type == "classification":
77c88226bfde planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
187 from pycaret.classification import ClassificationExperiment
77c88226bfde planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
188
77c88226bfde planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
189 self.exp = ClassificationExperiment()
77c88226bfde planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
190 elif self.task_type == "regression":
77c88226bfde planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
191 from pycaret.regression import RegressionExperiment
77c88226bfde planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
192
77c88226bfde planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
193 self.exp = RegressionExperiment()
77c88226bfde planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
194 else:
6
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
195 raise ValueError(
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
196 "task_type must be 'classification' or 'regression'"
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
197 )
2
77c88226bfde planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
198
0
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
199 self.exp.setup(self.data, **self.setup_params)
4
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
200 self.setup_params.update(self.user_kwargs)
0
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
201
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
202 def train_model(self):
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
203 LOG.info("Training and selecting the best model")
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
204 if self.task_type == "classification":
2
77c88226bfde planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
205 self.exp.add_metric(
4
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
206 id="PR-AUC-Weighted",
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
207 name="PR-AUC-Weighted",
2
77c88226bfde planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
208 target="pred_proba",
77c88226bfde planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
209 score_func=average_precision_score,
77c88226bfde planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
210 average="weighted",
77c88226bfde planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
211 )
4
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
212 # Build arguments for compare_models()
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
213 compare_kwargs = {}
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
214 if getattr(self, "models", None):
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
215 compare_kwargs["include"] = self.models
0
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
216
4
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
217 # Respect explicit cross-validation flag
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
218 if getattr(self, "cross_validation", None) is not None:
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
219 compare_kwargs["cross_validation"] = self.cross_validation
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
220
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
221 # Respect explicit fold count
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
222 if getattr(self, "cross_validation_folds", None) is not None:
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
223 compare_kwargs["fold"] = self.cross_validation_folds
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
224
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
225 LOG.info(f"compare_models kwargs: {compare_kwargs}")
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
226 self.best_model = self.exp.compare_models(**compare_kwargs)
0
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
227 self.results = self.exp.pull()
4
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
228 if getattr(self, "tune_model", False):
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
229 LOG.info("Tuning hyperparameters of the best model")
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
230 self.best_model = self.exp.tune_model(self.best_model)
6
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
231 self.tuning_results = self.exp.pull()
3
f6a65e05d6ec planemo upload for repository https://github.com/goeckslab/gleam commit b430f8b466655878c3bf63b053655fdbf039ddb0
goeckslab
parents: 2
diff changeset
232
0
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
233 if self.task_type == "classification":
2
77c88226bfde planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
234 self.results.rename(columns={"AUC": "ROC-AUC"}, inplace=True)
5
3d42f82b3c7f planemo upload for repository https://github.com/goeckslab/gleam commit 4a11e8a4c4e9daa884bddedfa47090476c517667
goeckslab
parents: 4
diff changeset
235
3d42f82b3c7f planemo upload for repository https://github.com/goeckslab/gleam commit 4a11e8a4c4e9daa884bddedfa47090476c517667
goeckslab
parents: 4
diff changeset
236 prob_thresh = getattr(self, "probability_threshold", None)
6
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
237 if self.task_type == "classification" and (
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
238 prob_thresh is not None
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
239 ):
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
240 _ = self.exp.predict_model(
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
241 self.best_model, probability_threshold=prob_thresh
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
242 )
5
3d42f82b3c7f planemo upload for repository https://github.com/goeckslab/gleam commit 4a11e8a4c4e9daa884bddedfa47090476c517667
goeckslab
parents: 4
diff changeset
243 else:
3d42f82b3c7f planemo upload for repository https://github.com/goeckslab/gleam commit 4a11e8a4c4e9daa884bddedfa47090476c517667
goeckslab
parents: 4
diff changeset
244 _ = self.exp.predict_model(self.best_model)
3d42f82b3c7f planemo upload for repository https://github.com/goeckslab/gleam commit 4a11e8a4c4e9daa884bddedfa47090476c517667
goeckslab
parents: 4
diff changeset
245
0
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
246 self.test_result_df = self.exp.pull()
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
247 if self.task_type == "classification":
6
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
248 self.test_result_df.rename(
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
249 columns={"AUC": "ROC-AUC"}, inplace=True
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
250 )
0
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
251
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
252 def save_model(self):
4
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
253 hdf5_path = Path(self.output_dir) / "pycaret_model.h5"
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
254 with h5py.File(hdf5_path, "w") as f:
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
255 with tempfile.NamedTemporaryFile(delete=False) as tmp:
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
256 joblib.dump(self.best_model, tmp.name)
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
257 tmp.seek(0)
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
258 model_bytes = tmp.read()
2
77c88226bfde planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
259 f.create_dataset("model", data=np.void(model_bytes))
0
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
260
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
261 def generate_plots(self):
4
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
262 LOG.info("Generating PyCaret diagnostic pltos")
0
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
263
6
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
264 # choose the right plots based on task type
4
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
265 if self.task_type == "classification":
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
266 plot_names = [
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
267 "learning",
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
268 "vc",
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
269 "calibration",
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
270 "dimension",
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
271 "manifold",
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
272 "rfe",
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
273 "threshold",
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
274 "percentage_above_below",
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
275 "class_report",
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
276 "pr_auc",
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
277 "roc_auc",
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
278 ]
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
279 else:
6
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
280 plot_names = ["residuals", "vc", "parameter", "error",
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
281 "learning"]
4
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
282 for name in plot_names:
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
283 try:
6
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
284 ax = self.exp.plot_model(
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
285 self.best_model, plot=name, save=False
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
286 )
4
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
287 out_path = Path(self.output_dir) / f"plot_{name}.png"
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
288 fig = ax.get_figure()
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
289 fig.savefig(out_path, bbox_inches="tight")
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
290 self.plots[name] = str(out_path)
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
291 except Exception as e:
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
292 LOG.warning(f"Could not generate {name} plot: {e}")
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
293
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
294 def encode_image_to_base64(self, img_path: str) -> str:
2
77c88226bfde planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
295 with open(img_path, "rb") as img_file:
77c88226bfde planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
296 return base64.b64encode(img_file.read()).decode("utf-8")
0
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
297
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
298 def save_html_report(self):
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
299 LOG.info("Saving HTML report")
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
300
4
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
301 # 1) Determine best model name
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
302 try:
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
303 best_model_name = str(self.results.iloc[0]["Model"])
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
304 except Exception:
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
305 best_model_name = type(self.best_model).__name__
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
306 LOG.info(f"Best model determined as: {best_model_name}")
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
307
6
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
308 # 2) Compute training sample count
4
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
309 try:
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
310 n_train = self.exp.X_train.shape[0]
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
311 except Exception:
6
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
312 n_train = getattr(
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
313 self.exp, "X_train_transformed", pd.DataFrame()
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
314 ).shape[0]
4
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
315 total_rows = self.data.shape[0]
2
77c88226bfde planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
316
4
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
317 # 3) Build setup parameters table
5
3d42f82b3c7f planemo upload for repository https://github.com/goeckslab/gleam commit 4a11e8a4c4e9daa884bddedfa47090476c517667
goeckslab
parents: 4
diff changeset
318 all_params = self.setup_params.copy()
6
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
319 if self.task_type == "classification" and (
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
320 hasattr(self, "probability_threshold")
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
321 ):
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
322 all_params["probability_threshold"] = (
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
323 self.probability_threshold
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
324 )
4
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
325 display_keys = [
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
326 "Target",
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
327 "Session ID",
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
328 "Train Size",
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
329 "Normalize",
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
330 "Feature Selection",
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
331 "Cross Validation",
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
332 "Cross Validation Folds",
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
333 "Remove Outliers",
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
334 "Remove Multicollinearity",
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
335 "Polynomial Features",
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
336 "Fix Imbalance",
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
337 "Models",
5
3d42f82b3c7f planemo upload for repository https://github.com/goeckslab/gleam commit 4a11e8a4c4e9daa884bddedfa47090476c517667
goeckslab
parents: 4
diff changeset
338 "Probability Threshold",
4
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
339 ]
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
340 setup_rows = []
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
341 for key in display_keys:
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
342 pk = key.lower().replace(" ", "_")
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
343 v = all_params.get(pk)
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
344 if key == "Train Size":
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
345 frac = (
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
346 float(v)
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
347 if v is not None
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
348 else (n_train / total_rows if total_rows else 0)
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
349 )
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
350 dv = f"{frac:.2f} ({n_train} rows)"
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
351 elif key in {
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
352 "Normalize",
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
353 "Feature Selection",
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
354 "Cross Validation",
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
355 "Remove Outliers",
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
356 "Remove Multicollinearity",
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
357 "Polynomial Features",
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
358 "Fix Imbalance",
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
359 }:
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
360 dv = bool(v)
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
361 elif key == "Cross Validation Folds":
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
362 dv = v if v is not None else "None"
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
363 elif key == "Models":
6
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
364 dv = ", ".join(map(str, v)) if isinstance(
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
365 v, (list, tuple)
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
366 ) else "None"
5
3d42f82b3c7f planemo upload for repository https://github.com/goeckslab/gleam commit 4a11e8a4c4e9daa884bddedfa47090476c517667
goeckslab
parents: 4
diff changeset
367 elif key == "Probability Threshold":
6
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
368 dv = f"{v:.2f}" if v is not None else "0.5"
4
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
369 else:
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
370 dv = v if v is not None else "None"
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
371 setup_rows.append([key, dv])
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
372 if hasattr(self.exp, "_fold_metric"):
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
373 setup_rows.append(["best_model_metric", self.exp._fold_metric])
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
374
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
375 df_setup = pd.DataFrame(setup_rows, columns=["Parameter", "Value"])
6
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
376 df_setup.to_csv(
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
377 Path(self.output_dir) / "setup_params.csv", index=False
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
378 )
4
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
379
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
380 # 4) Persist CSVs
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
381 self.results.to_csv(
6
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
382 Path(self.output_dir) / "comparison_results.csv",
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
383 index=False
4
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
384 )
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
385 self.test_result_df.to_csv(
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
386 Path(self.output_dir) / "test_results.csv", index=False
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
387 )
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
388 pd.DataFrame(
6
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
389 self.best_model.get_params().items(),
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
390 columns=["Parameter", "Value"]
4
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
391 ).to_csv(Path(self.output_dir) / "best_model.csv", index=False)
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
392
6
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
393 if self.tuning_results is not None:
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
394 self.tuning_results.to_csv(
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
395 Path(self.output_dir) / "tuning_results.csv",
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
396 index=False
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
397 )
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
398
4
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
399 # 5) Header
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
400 header = f"<h2>Best Model: {best_model_name}</h2>"
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
401
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
402 # — Validation Summary & Configuration —
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
403 val_df = self.results.copy()
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
404 # mapping raw plot keys to user-friendly titles
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
405 plot_title_map = {
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
406 "learning": "Learning Curve",
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
407 "vc": "Validation Curve",
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
408 "calibration": "Calibration Curve",
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
409 "dimension": "Dimensionality Reduction",
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
410 "manifold": "Manifold Learning",
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
411 "rfe": "Recursive Feature Elimination",
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
412 "threshold": "Threshold Plot",
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
413 "percentage_above_below": "Percentage Above vs. Below Cutoff",
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
414 "class_report": "Classification Report",
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
415 "pr_auc": "Precision-Recall AUC",
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
416 "roc_auc": "Receiver Operating Characteristic AUC",
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
417 "residuals": "Residuals Distribution",
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
418 "error": "Prediction Error Distribution",
0
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
419 }
6
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
420 val_df.drop(
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
421 columns=["TT (Ec)", "TT (Sec)"], errors="ignore", inplace=True
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
422 )
4
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
423 summary_html = (
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
424 header
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
425 + "<h2>Train & Validation Summary</h2>"
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
426 + '<div class="table-wrapper">'
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
427 + val_df.to_html(index=False, classes="table sortable")
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
428 + "</div>"
6
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
429 )
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
430
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
431 if self.tuning_results is not None:
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
432 tuning_df = self.tuning_results.copy()
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
433 tuning_df.drop(
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
434 columns=["TT (Sec)"], errors="ignore", inplace=True
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
435 )
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
436 summary_html += (
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
437 f"<h2>{best_model_name}: Tuning Summary</h2>"
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
438 + '<div class="table-wrapper">'
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
439 + tuning_df.to_html(index=False, classes="table sortable")
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
440 + "</div>"
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
441 )
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
442
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
443 summary_html += (
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
444 "<h2>Setup Parameters</h2>"
4
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
445 + '<div class="table-wrapper">'
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
446 + df_setup.to_html(index=False, classes="table sortable")
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
447 + "</div>"
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
448 # — Hyperparameters
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
449 + "<h2>Best Model Hyperparameters</h2>"
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
450 + '<div class="table-wrapper">'
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
451 + pd.DataFrame(
6
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
452 self.best_model.get_params().items(),
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
453 columns=["Parameter", "Value"]
4
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
454 ).to_html(index=False, classes="table sortable")
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
455 + "</div>"
0
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
456 )
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
457
4
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
458 # choose summary plots based on task type
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
459 if self.task_type == "classification":
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
460 summary_plots = [
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
461 "learning",
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
462 "vc",
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
463 "calibration",
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
464 "dimension",
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
465 "manifold",
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
466 "rfe",
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
467 "threshold",
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
468 "percentage_above_below",
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
469 ]
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
470 else:
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
471 summary_plots = ["learning", "vc", "parameter", "residuals"]
0
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
472
4
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
473 for name in summary_plots:
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
474 if name in self.plots:
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
475 summary_html += "<hr>"
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
476 b64 = encode_image_to_base64(self.plots[name])
6
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
477 title = plot_title_map.get(
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
478 name, name.replace("_", " ").title()
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
479 )
4
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
480 summary_html += (
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
481 '<div class="plot">'
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
482 f"<h2>{title}</h2>"
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
483 f'<img src="data:image/png;base64,{b64}" '
6
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
484 'style="max-width:90%;max-height:600px;'
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
485 'border:1px solid #ddd;"/>'
4
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
486 "</div>"
2
77c88226bfde planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
487 )
0
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
488
4
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
489 # — Test Summary —
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
490 test_html = (
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
491 header
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
492 + '<div class="table-wrapper">'
6
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
493 + self.test_result_df.to_html(
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
494 index=False, classes="table sortable"
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
495 )
4
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
496 + "</div>"
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
497 )
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
498 if self.task_type == "regression":
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
499 try:
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
500 y_true = (
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
501 pd.Series(self.exp.y_test_transformed)
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
502 .reset_index(drop=True)
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
503 .rename("True")
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
504 )
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
505 y_pred = pd.Series(
6
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
506 self.best_model.predict(
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
507 self.exp.X_test_transformed
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
508 )
4
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
509 ).rename("Predicted")
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
510 df_tp = pd.concat([y_true, y_pred], axis=1)
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
511 test_html += "<h2>True vs Predicted Values</h2>"
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
512 test_html += (
6
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
513 '<div class="table-wrapper" '
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
514 'style="max-height:400px; overflow-y:auto;">'
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
515 + df_tp.head(50).to_html(
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
516 index=False, classes="table sortable"
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
517 )
4
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
518 + "</div>"
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
519 + add_hr_to_html()
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
520 )
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
521 except Exception as e:
6
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
522 LOG.warning(
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
523 f"Could not generate True vs Predicted table: {e}"
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
524 )
4
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
525
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
526 # 5a) Explainer-substituted plots in order
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
527 if self.task_type == "regression":
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
528 test_order = ["residuals"]
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
529 else:
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
530 test_order = [
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
531 "confusion_matrix",
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
532 "roc_auc",
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
533 "pr_auc",
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
534 "lift_curve",
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
535 "threshold",
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
536 "cumulative_precision",
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
537 ]
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
538 for key in test_order:
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
539 fig_or_fn = self.explainer_plots.pop(key, None)
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
540 if fig_or_fn is not None:
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
541 fig = fig_or_fn() if callable(fig_or_fn) else fig_or_fn
6
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
542 title = plot_title_map.get(
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
543 key, key.replace("_", " ").title()
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
544 )
4
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
545 test_html += (
6
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
546 f"<h2>{title}</h2>" + add_plot_to_html(fig)
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
547 + add_hr_to_html()
4
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
548 )
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
549 # 5b) Remaining PyCaret test plots
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
550 for name, path in self.plots.items():
6
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
551 # classification: include only the small extras, before
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
552 # skipping anything
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
553 if self.task_type == "classification" and (
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
554 name in {
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
555 "threshold",
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
556 "pr_auc",
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
557 "class_report",
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
558 }
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
559 ):
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
560 title = plot_title_map.get(
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
561 name, name.replace("_", " ").title()
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
562 )
4
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
563 b64 = encode_image_to_base64(path)
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
564 test_html += (
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
565 f"<h2>{title}</h2>"
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
566 "<div class='plot'>"
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
567 f"<img src='data:image/png;base64,{b64}' "
6
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
568 "style='max-width:90%;max-height:600px;"
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
569 "border:1px solid #ddd;'/>"
4
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
570 "</div>" + add_hr_to_html()
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
571 )
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
572 continue
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
573
6
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
574 # regression: explicitly include the 'error' plot,
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
575 # before skipping
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
576 if self.task_type == "regression" and (
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
577 name == "error"
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
578 ):
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
579 title = plot_title_map.get(
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
580 "error", "Prediction Error Distribution"
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
581 )
4
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
582 b64 = encode_image_to_base64(path)
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
583 test_html += (
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
584 f"<h2>{title}</h2>"
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
585 "<div class='plot'>"
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
586 f"<img src='data:image/png;base64,{b64}' "
6
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
587 "style='max-width:90%;max-height:600px;"
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
588 "border:1px solid #ddd;'/>"
4
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
589 "</div>" + add_hr_to_html()
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
590 )
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
591 continue
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
592
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
593 # now skip any plots already rendered via test_order
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
594 if name in test_order:
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
595 continue
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
596
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
597 # — Feature Importance —
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
598 feature_html = header
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
599
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
600 # 6a) PyCaret’s default feature importances
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
601 feature_html += FeatureImportanceAnalyzer(
0
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
602 data=self.data,
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
603 target_col=self.target_col,
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
604 task_type=self.task_type,
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
605 output_dir=self.output_dir,
2
77c88226bfde planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
606 exp=self.exp,
77c88226bfde planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
607 best_model=self.best_model,
4
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
608 ).run()
0
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
609
4
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
610 # 6b) Explainer SHAP importances
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
611 for key in ["shap_mean", "shap_perm"]:
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
612 fig_or_fn = self.explainer_plots.pop(key, None)
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
613 if fig_or_fn is not None:
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
614 fig = fig_or_fn() if callable(fig_or_fn) else fig_or_fn
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
615 # give SHAP plots explicit titles
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
616 title = (
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
617 "Mean Absolute SHAP Value Impact"
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
618 if key == "shap_mean"
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
619 else "Permutation Feature Importance"
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
620 )
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
621 feature_html += (
6
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
622 f"<h2>{title}</h2>" + add_plot_to_html(fig)
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
623 + add_hr_to_html()
4
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
624 )
2
77c88226bfde planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
625
4
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
626 # 6c) PDPs last
6
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
627 pdp_keys = sorted(
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
628 k for k in self.explainer_plots if k.startswith("pdp__")
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
629 )
4
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
630 for k in pdp_keys:
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
631 fig_or_fn = self.explainer_plots[k]
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
632 fig = fig_or_fn() if callable(fig_or_fn) else fig_or_fn
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
633 # extract feature name
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
634 feature = k.split("__", 1)[1]
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
635 title = f"Partial Dependence for {feature}"
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
636 feature_html += (
6
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
637 f"<h2>{title}</h2>" + add_plot_to_html(fig)
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
638 + add_hr_to_html()
2
77c88226bfde planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
639 )
4
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
640 # 7) Assemble final HTML (three tabs)
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
641 html = get_html_template()
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
642 html += "<h1>Tabular Learner Model Report</h1>"
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
643 html += build_tabbed_html(summary_html, test_html, feature_html)
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
644 html += get_feature_metrics_help_modal()
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
645 html += get_html_closing()
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
646
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
647 # 8) Write out
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
648 (Path(self.output_dir) / "comparison_result.html").write_text(
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
649 html, encoding="utf-8"
2
77c88226bfde planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
650 )
6
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
651 LOG.info(
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
652 f"HTML report generated at: "
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
653 f"{self.output_dir}/comparison_result.html"
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
654 )
0
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
655
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
656 def save_dashboard(self):
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
657 raise NotImplementedError("Subclasses should implement this method")
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
658
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
659 def generate_plots_explainer(self):
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
660 raise NotImplementedError("Subclasses should implement this method")
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
661
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
662 def generate_tree_plots(self):
6
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
663 from sklearn.ensemble import (
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
664 RandomForestClassifier, RandomForestRegressor
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
665 )
0
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
666 from xgboost import XGBClassifier, XGBRegressor
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
667 from explainerdashboard.explainers import RandomForestExplainer
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
668
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
669 LOG.info("Generating tree plots")
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
670 X_test = self.exp.X_test_transformed.copy()
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
671 y_test = self.exp.y_test_transformed
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
672
6
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
673 if isinstance(
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
674 self.best_model, (RandomForestClassifier, RandomForestRegressor)
4bd75b45a7a1 planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents: 5
diff changeset
675 ):
4
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
676 n_trees = self.best_model.n_estimators
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
677 elif isinstance(self.best_model, (XGBClassifier, XGBRegressor)):
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
678 n_trees = len(self.best_model.get_booster().get_dump())
2
77c88226bfde planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
679 else:
77c88226bfde planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
680 LOG.warning("Tree plots not supported for this model type.")
77c88226bfde planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 0
diff changeset
681 return
0
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
682
4
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
683 explainer = RandomForestExplainer(self.best_model, X_test, y_test)
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
684 for i in range(n_trees):
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
685 fig = explainer.decisiontree_encoded(tree_idx=i, index=0)
11fdac5affb3 planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 3
diff changeset
686 self.trees.append(fig)
0
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
687
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
688 def run(self):
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
689 self.load_data()
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
690 self.setup_pycaret()
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
691 self.train_model()
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
692 self.save_model()
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
693 self.generate_plots()
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
694 self.generate_plots_explainer()
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
695 self.generate_tree_plots()
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
696 self.save_html_report()
209b663a4f62 planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff changeset
697 # self.save_dashboard()