annotate base_model_trainer.py @ 8:1aed7d47c5ec draft default tip

planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
author goeckslab
date Fri, 25 Jul 2025 19:02:32 +0000
parents f4cb41f458fd
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
1 import base64
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
2 import logging
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
3 import tempfile
8
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
4 from pathlib import Path
0
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
5
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
6 import h5py
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
7 import joblib
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
8 import numpy as np
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
9 import pandas as pd
6
a32ff7201629 planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 3
diff changeset
10 from feature_help_modal import get_feature_metrics_help_modal
3
ccd798db5abb planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit cf47efb521b91a9cb44ae5c5ade860627f9b9030
goeckslab
parents: 2
diff changeset
11 from feature_importance import FeatureImportanceAnalyzer
0
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
12 from sklearn.metrics import average_precision_score
8
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
13 from utils import (
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
14 add_hr_to_html,
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
15 add_plot_to_html,
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
16 build_tabbed_html,
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
17 encode_image_to_base64,
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
18 get_html_closing,
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
19 get_html_template,
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
20 )
0
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
21
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
22 logging.basicConfig(level=logging.DEBUG)
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
23 LOG = logging.getLogger(__name__)
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
24
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
25
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
26 class BaseModelTrainer:
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
27 def __init__(
6
a32ff7201629 planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 3
diff changeset
28 self,
a32ff7201629 planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 3
diff changeset
29 input_file,
a32ff7201629 planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 3
diff changeset
30 target_col,
a32ff7201629 planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 3
diff changeset
31 output_dir,
a32ff7201629 planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 3
diff changeset
32 task_type,
a32ff7201629 planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 3
diff changeset
33 random_seed,
a32ff7201629 planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 3
diff changeset
34 test_file=None,
a32ff7201629 planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 3
diff changeset
35 **kwargs,
a32ff7201629 planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 3
diff changeset
36 ):
8
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
37 self.exp = None
0
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
38 self.input_file = input_file
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
39 self.target_col = target_col
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
40 self.output_dir = output_dir
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
41 self.task_type = task_type
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
42 self.random_seed = random_seed
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
43 self.data = None
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
44 self.target = None
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
45 self.best_model = None
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
46 self.results = None
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
47 self.features_name = None
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
48 self.plots = {}
8
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
49 self.explainer_plots = {}
0
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
50 self.plots_explainer_html = None
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
51 self.trees = []
8
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
52 self.user_kwargs = kwargs.copy()
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
53 for key, value in self.user_kwargs.items():
0
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
54 setattr(self, key, value)
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
55 self.setup_params = {}
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
56 self.test_file = test_file
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
57 self.test_data = None
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
58
6
a32ff7201629 planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 3
diff changeset
59 if not self.output_dir:
a32ff7201629 planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 3
diff changeset
60 raise ValueError("output_dir must be specified and not None")
a32ff7201629 planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 3
diff changeset
61
0
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
62 LOG.info(f"Model kwargs: {self.__dict__}")
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
63
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
64 def load_data(self):
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
65 LOG.info(f"Loading data from {self.input_file}")
6
a32ff7201629 planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 3
diff changeset
66 self.data = pd.read_csv(self.input_file, sep=None, engine="python")
a32ff7201629 planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 3
diff changeset
67 self.data.columns = self.data.columns.str.replace(".", "_")
a32ff7201629 planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 3
diff changeset
68 if "prediction_label" in self.data.columns:
a32ff7201629 planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 3
diff changeset
69 self.data = self.data.drop(columns=["prediction_label"])
a32ff7201629 planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 3
diff changeset
70
a32ff7201629 planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 3
diff changeset
71 numeric_cols = self.data.select_dtypes(include=["number"]).columns
a32ff7201629 planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 3
diff changeset
72 non_numeric_cols = self.data.select_dtypes(exclude=["number"]).columns
0
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
73 self.data[numeric_cols] = self.data[numeric_cols].apply(
6
a32ff7201629 planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 3
diff changeset
74 pd.to_numeric, errors="coerce"
a32ff7201629 planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 3
diff changeset
75 )
0
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
76 if len(non_numeric_cols) > 0:
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
77 LOG.info(f"Non-numeric columns found: {non_numeric_cols.tolist()}")
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
78
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
79 names = self.data.columns.to_list()
3
ccd798db5abb planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit cf47efb521b91a9cb44ae5c5ade860627f9b9030
goeckslab
parents: 2
diff changeset
80 target_index = int(self.target_col) - 1
0
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
81 self.target = names[target_index]
8
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
82 self.features_name = [n for i, n in enumerate(names) if i != target_index]
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
83
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
84 if getattr(self, "missing_value_strategy", None):
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
85 strat = self.missing_value_strategy
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
86 if strat == "mean":
6
a32ff7201629 planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 3
diff changeset
87 self.data = self.data.fillna(self.data.mean(numeric_only=True))
8
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
88 elif strat == "median":
6
a32ff7201629 planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 3
diff changeset
89 self.data = self.data.fillna(self.data.median(numeric_only=True))
8
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
90 elif strat == "drop":
0
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
91 self.data = self.data.dropna()
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
92 else:
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
93 self.data = self.data.fillna(self.data.median(numeric_only=True))
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
94
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
95 if self.test_file:
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
96 LOG.info(f"Loading test data from {self.test_file}")
8
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
97 df_test = pd.read_csv(self.test_file, sep=None, engine="python")
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
98 df_test.columns = df_test.columns.str.replace(".", "_")
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
99 self.test_data = df_test
0
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
100
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
101 def setup_pycaret(self):
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
102 LOG.info("Initializing PyCaret")
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
103 self.setup_params = {
6
a32ff7201629 planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 3
diff changeset
104 "target": self.target,
a32ff7201629 planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 3
diff changeset
105 "session_id": self.random_seed,
a32ff7201629 planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 3
diff changeset
106 "html": True,
a32ff7201629 planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 3
diff changeset
107 "log_experiment": False,
a32ff7201629 planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 3
diff changeset
108 "system_log": False,
a32ff7201629 planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 3
diff changeset
109 "index": False,
0
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
110 }
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
111 if self.test_data is not None:
6
a32ff7201629 planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 3
diff changeset
112 self.setup_params["test_data"] = self.test_data
8
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
113 for attr in [
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
114 "train_size",
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
115 "normalize",
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
116 "feature_selection",
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
117 "remove_outliers",
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
118 "remove_multicollinearity",
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
119 "polynomial_features",
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
120 "feature_interaction",
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
121 "feature_ratio",
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
122 "fix_imbalance",
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
123 ]:
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
124 val = getattr(self, attr, None)
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
125 if val is not None:
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
126 self.setup_params[attr] = val
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
127 if getattr(self, "cross_validation_folds", None) is not None:
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
128 self.setup_params["fold"] = self.cross_validation_folds
0
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
129 LOG.info(self.setup_params)
6
a32ff7201629 planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 3
diff changeset
130
a32ff7201629 planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 3
diff changeset
131 if self.task_type == "classification":
a32ff7201629 planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 3
diff changeset
132 from pycaret.classification import ClassificationExperiment
a32ff7201629 planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 3
diff changeset
133
a32ff7201629 planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 3
diff changeset
134 self.exp = ClassificationExperiment()
a32ff7201629 planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 3
diff changeset
135 elif self.task_type == "regression":
a32ff7201629 planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 3
diff changeset
136 from pycaret.regression import RegressionExperiment
a32ff7201629 planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 3
diff changeset
137
a32ff7201629 planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 3
diff changeset
138 self.exp = RegressionExperiment()
a32ff7201629 planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 3
diff changeset
139 else:
a32ff7201629 planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 3
diff changeset
140 raise ValueError("task_type must be 'classification' or 'regression'")
a32ff7201629 planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 3
diff changeset
141
0
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
142 self.exp.setup(self.data, **self.setup_params)
8
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
143 self.setup_params.update(self.user_kwargs)
0
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
144
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
145 def train_model(self):
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
146 LOG.info("Training and selecting the best model")
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
147 if self.task_type == "classification":
6
a32ff7201629 planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 3
diff changeset
148 self.exp.add_metric(
8
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
149 id="PR-AUC-Weighted",
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
150 name="PR-AUC-Weighted",
6
a32ff7201629 planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 3
diff changeset
151 target="pred_proba",
a32ff7201629 planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 3
diff changeset
152 score_func=average_precision_score,
a32ff7201629 planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 3
diff changeset
153 average="weighted",
a32ff7201629 planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 3
diff changeset
154 )
8
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
155 # Build arguments for compare_models()
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
156 compare_kwargs = {}
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
157 if getattr(self, "models", None):
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
158 compare_kwargs["include"] = self.models
0
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
159
8
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
160 # Respect explicit cross-validation flag
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
161 if getattr(self, "cross_validation", None) is not None:
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
162 compare_kwargs["cross_validation"] = self.cross_validation
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
163
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
164 # Respect explicit fold count
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
165 if getattr(self, "cross_validation_folds", None) is not None:
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
166 compare_kwargs["fold"] = self.cross_validation_folds
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
167
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
168 LOG.info(f"compare_models kwargs: {compare_kwargs}")
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
169 self.best_model = self.exp.compare_models(**compare_kwargs)
0
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
170 self.results = self.exp.pull()
8
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
171 if getattr(self, "tune_model", False):
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
172 LOG.info("Tuning hyperparameters of the best model")
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
173 self.best_model = self.exp.tune_model(self.best_model)
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
174 self.results = self.exp.pull()
7
f4cb41f458fd planemo upload for repository https://github.com/goeckslab/gleam commit b430f8b466655878c3bf63b053655fdbf039ddb0
goeckslab
parents: 6
diff changeset
175
0
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
176 if self.task_type == "classification":
6
a32ff7201629 planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 3
diff changeset
177 self.results.rename(columns={"AUC": "ROC-AUC"}, inplace=True)
0
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
178 _ = self.exp.predict_model(self.best_model)
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
179 self.test_result_df = self.exp.pull()
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
180 if self.task_type == "classification":
6
a32ff7201629 planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 3
diff changeset
181 self.test_result_df.rename(columns={"AUC": "ROC-AUC"}, inplace=True)
0
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
182
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
183 def save_model(self):
8
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
184 hdf5_path = Path(self.output_dir) / "pycaret_model.h5"
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
185 with h5py.File(hdf5_path, "w") as f:
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
186 with tempfile.NamedTemporaryFile(delete=False) as tmp:
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
187 joblib.dump(self.best_model, tmp.name)
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
188 tmp.seek(0)
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
189 model_bytes = tmp.read()
6
a32ff7201629 planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 3
diff changeset
190 f.create_dataset("model", data=np.void(model_bytes))
0
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
191
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
192 def generate_plots(self):
8
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
193 LOG.info("Generating PyCaret diagnostic pltos")
0
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
194
8
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
195 # choose the right plots based on task
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
196 if self.task_type == "classification":
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
197 plot_names = [
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
198 "learning",
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
199 "vc",
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
200 "calibration",
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
201 "dimension",
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
202 "manifold",
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
203 "rfe",
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
204 "threshold",
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
205 "percentage_above_below",
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
206 "class_report",
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
207 "pr_auc",
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
208 "roc_auc",
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
209 ]
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
210 else:
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
211 plot_names = ["residuals", "vc", "parameter", "error", "learning"]
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
212 for name in plot_names:
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
213 try:
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
214 ax = self.exp.plot_model(self.best_model, plot=name, save=False)
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
215 out_path = Path(self.output_dir) / f"plot_{name}.png"
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
216 fig = ax.get_figure()
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
217 fig.savefig(out_path, bbox_inches="tight")
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
218 self.plots[name] = str(out_path)
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
219 except Exception as e:
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
220 LOG.warning(f"Could not generate {name} plot: {e}")
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
221
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
222 def encode_image_to_base64(self, img_path: str) -> str:
6
a32ff7201629 planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 3
diff changeset
223 with open(img_path, "rb") as img_file:
a32ff7201629 planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 3
diff changeset
224 return base64.b64encode(img_file.read()).decode("utf-8")
0
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
225
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
226 def save_html_report(self):
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
227 LOG.info("Saving HTML report")
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
228
8
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
229 # 1) Determine best model name
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
230 try:
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
231 best_model_name = str(self.results.iloc[0]["Model"])
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
232 except Exception:
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
233 best_model_name = type(self.best_model).__name__
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
234 LOG.info(f"Best model determined as: {best_model_name}")
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
235
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
236 # 2) Compute training sample count
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
237 try:
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
238 n_train = self.exp.X_train.shape[0]
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
239 except Exception:
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
240 n_train = getattr(self.exp, "X_train_transformed", pd.DataFrame()).shape[0]
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
241 total_rows = self.data.shape[0]
6
a32ff7201629 planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 3
diff changeset
242
8
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
243 # 3) Build setup parameters table
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
244 all_params = self.setup_params
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
245 display_keys = [
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
246 "Target",
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
247 "Session ID",
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
248 "Train Size",
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
249 "Normalize",
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
250 "Feature Selection",
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
251 "Cross Validation",
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
252 "Cross Validation Folds",
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
253 "Remove Outliers",
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
254 "Remove Multicollinearity",
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
255 "Polynomial Features",
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
256 "Fix Imbalance",
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
257 "Models",
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
258 ]
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
259 setup_rows = []
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
260 for key in display_keys:
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
261 pk = key.lower().replace(" ", "_")
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
262 v = all_params.get(pk)
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
263 if key == "Train Size":
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
264 frac = (
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
265 float(v)
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
266 if v is not None
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
267 else (n_train / total_rows if total_rows else 0)
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
268 )
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
269 dv = f"{frac:.2f} ({n_train} rows)"
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
270 elif key in {
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
271 "Normalize",
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
272 "Feature Selection",
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
273 "Cross Validation",
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
274 "Remove Outliers",
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
275 "Remove Multicollinearity",
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
276 "Polynomial Features",
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
277 "Fix Imbalance",
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
278 }:
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
279 dv = bool(v)
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
280 elif key == "Cross Validation Folds":
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
281 dv = v if v is not None else "None"
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
282 elif key == "Models":
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
283 dv = ", ".join(map(str, v)) if isinstance(v, (list, tuple)) else "None"
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
284 else:
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
285 dv = v if v is not None else "None"
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
286 setup_rows.append([key, dv])
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
287 if hasattr(self.exp, "_fold_metric"):
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
288 setup_rows.append(["best_model_metric", self.exp._fold_metric])
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
289
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
290 df_setup = pd.DataFrame(setup_rows, columns=["Parameter", "Value"])
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
291 df_setup.to_csv(Path(self.output_dir) / "setup_params.csv", index=False)
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
292
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
293 # 4) Persist CSVs
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
294 self.results.to_csv(
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
295 Path(self.output_dir) / "comparison_results.csv", index=False
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
296 )
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
297 self.test_result_df.to_csv(
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
298 Path(self.output_dir) / "test_results.csv", index=False
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
299 )
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
300 pd.DataFrame(
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
301 self.best_model.get_params().items(), columns=["Parameter", "Value"]
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
302 ).to_csv(Path(self.output_dir) / "best_model.csv", index=False)
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
303
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
304 # 5) Header
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
305 header = f"<h2>Best Model: {best_model_name}</h2>"
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
306
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
307 # — Validation Summary & Configuration —
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
308 val_df = self.results.copy()
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
309 # mapping raw plot keys to user-friendly titles
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
310 plot_title_map = {
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
311 "learning": "Learning Curve",
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
312 "vc": "Validation Curve",
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
313 "calibration": "Calibration Curve",
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
314 "dimension": "Dimensionality Reduction",
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
315 "manifold": "Manifold Learning",
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
316 "rfe": "Recursive Feature Elimination",
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
317 "threshold": "Threshold Plot",
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
318 "percentage_above_below": "Percentage Above vs. Below Cutoff",
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
319 "class_report": "Classification Report",
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
320 "pr_auc": "Precision-Recall AUC",
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
321 "roc_auc": "Receiver Operating Characteristic AUC",
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
322 "residuals": "Residuals Distribution",
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
323 "error": "Prediction Error Distribution",
0
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
324 }
8
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
325 val_df.drop(columns=["TT (Ec)", "TT (Sec)"], errors="ignore", inplace=True)
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
326 summary_html = (
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
327 header
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
328 + "<h2>Train & Validation Summary</h2>"
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
329 + '<div class="table-wrapper">'
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
330 + val_df.to_html(index=False, classes="table sortable")
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
331 + "</div>"
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
332 + "<h2>Setup Parameters</h2>"
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
333 + '<div class="table-wrapper">'
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
334 + df_setup.to_html(index=False, classes="table sortable")
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
335 + "</div>"
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
336 # — Hyperparameters
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
337 + "<h2>Best Model Hyperparameters</h2>"
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
338 + '<div class="table-wrapper">'
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
339 + pd.DataFrame(
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
340 self.best_model.get_params().items(), columns=["Parameter", "Value"]
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
341 ).to_html(index=False, classes="table sortable")
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
342 + "</div>"
3
ccd798db5abb planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit cf47efb521b91a9cb44ae5c5ade860627f9b9030
goeckslab
parents: 2
diff changeset
343 )
0
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
344
8
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
345 # choose summary plots based on task type
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
346 if self.task_type == "classification":
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
347 summary_plots = [
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
348 "learning",
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
349 "vc",
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
350 "calibration",
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
351 "dimension",
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
352 "manifold",
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
353 "rfe",
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
354 "threshold",
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
355 "percentage_above_below",
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
356 ]
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
357 else:
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
358 summary_plots = ["learning", "vc", "parameter", "residuals"]
0
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
359
8
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
360 for name in summary_plots:
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
361 if name in self.plots:
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
362 summary_html += "<hr>"
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
363 b64 = encode_image_to_base64(self.plots[name])
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
364 title = plot_title_map.get(name, name.replace("_", " ").title())
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
365 summary_html += (
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
366 '<div class="plot">'
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
367 f"<h2>{title}</h2>"
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
368 f'<img src="data:image/png;base64,{b64}" '
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
369 'style="max-width:90%;max-height:600px;border:1px solid #ddd;"/>'
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
370 "</div>"
6
a32ff7201629 planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 3
diff changeset
371 )
0
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
372
8
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
373 # — Test Summary —
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
374 test_html = (
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
375 header
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
376 + '<div class="table-wrapper">'
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
377 + self.test_result_df.to_html(index=False, classes="table sortable")
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
378 + "</div>"
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
379 )
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
380 if self.task_type == "regression":
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
381 try:
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
382 y_true = (
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
383 pd.Series(self.exp.y_test_transformed)
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
384 .reset_index(drop=True)
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
385 .rename("True")
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
386 )
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
387 y_pred = pd.Series(
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
388 self.best_model.predict(self.exp.X_test_transformed)
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
389 ).rename("Predicted")
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
390 df_tp = pd.concat([y_true, y_pred], axis=1)
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
391 test_html += "<h2>True vs Predicted Values</h2>"
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
392 test_html += (
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
393 '<div class="table-wrapper" style="max-height:400px; overflow-y:auto;">'
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
394 + df_tp.head(50).to_html(index=False, classes="table sortable")
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
395 + "</div>"
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
396 + add_hr_to_html()
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
397 )
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
398 except Exception as e:
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
399 LOG.warning(f"Could not generate True vs Predicted table: {e}")
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
400
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
401 # 5a) Explainer-substituted plots in order
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
402 if self.task_type == "regression":
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
403 test_order = ["residuals"]
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
404 else:
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
405 test_order = [
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
406 "confusion_matrix",
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
407 "roc_auc",
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
408 "pr_auc",
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
409 "lift_curve",
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
410 "threshold",
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
411 "cumulative_precision",
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
412 ]
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
413 for key in test_order:
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
414 fig_or_fn = self.explainer_plots.pop(key, None)
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
415 if fig_or_fn is not None:
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
416 fig = fig_or_fn() if callable(fig_or_fn) else fig_or_fn
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
417 title = plot_title_map.get(key, key.replace("_", " ").title())
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
418 test_html += (
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
419 f"<h2>{title}</h2>" + add_plot_to_html(fig) + add_hr_to_html()
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
420 )
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
421 # 5b) Remaining PyCaret test plots
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
422 for name, path in self.plots.items():
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
423 # classification: include only the small extras, before skipping anything
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
424 if self.task_type == "classification" and name in {
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
425 "threshold",
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
426 "pr_auc",
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
427 "class_report",
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
428 }:
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
429 title = plot_title_map.get(name, name.replace("_", " ").title())
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
430 b64 = encode_image_to_base64(path)
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
431 test_html += (
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
432 f"<h2>{title}</h2>"
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
433 "<div class='plot'>"
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
434 f"<img src='data:image/png;base64,{b64}' "
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
435 "style='max-width:90%;max-height:600px;border:1px solid #ddd;'/>"
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
436 "</div>" + add_hr_to_html()
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
437 )
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
438 continue
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
439
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
440 # regression: explicitly include the 'error' plot, before skipping
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
441 if self.task_type == "regression" and name == "error":
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
442 title = plot_title_map.get("error", "Prediction Error Distribution")
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
443 b64 = encode_image_to_base64(path)
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
444 test_html += (
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
445 f"<h2>{title}</h2>"
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
446 "<div class='plot'>"
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
447 f"<img src='data:image/png;base64,{b64}' "
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
448 "style='max-width:90%;max-height:600px;border:1px solid #ddd;'/>"
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
449 "</div>" + add_hr_to_html()
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
450 )
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
451 continue
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
452
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
453 # now skip any plots already rendered via test_order
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
454 if name in test_order:
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
455 continue
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
456
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
457 # — Feature Importance —
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
458 feature_html = header
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
459
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
460 # 6a) PyCaret’s default feature importances
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
461 feature_html += FeatureImportanceAnalyzer(
0
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
462 data=self.data,
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
463 target_col=self.target_col,
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
464 task_type=self.task_type,
3
ccd798db5abb planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit cf47efb521b91a9cb44ae5c5ade860627f9b9030
goeckslab
parents: 2
diff changeset
465 output_dir=self.output_dir,
6
a32ff7201629 planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 3
diff changeset
466 exp=self.exp,
a32ff7201629 planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 3
diff changeset
467 best_model=self.best_model,
8
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
468 ).run()
0
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
469
8
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
470 # 6b) Explainer SHAP importances
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
471 for key in ["shap_mean", "shap_perm"]:
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
472 fig_or_fn = self.explainer_plots.pop(key, None)
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
473 if fig_or_fn is not None:
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
474 fig = fig_or_fn() if callable(fig_or_fn) else fig_or_fn
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
475 # give SHAP plots explicit titles
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
476 title = (
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
477 "Mean Absolute SHAP Value Impact"
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
478 if key == "shap_mean"
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
479 else "Permutation Feature Importance"
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
480 )
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
481 feature_html += (
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
482 f"<h2>{title}</h2>" + add_plot_to_html(fig) + add_hr_to_html()
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
483 )
6
a32ff7201629 planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 3
diff changeset
484
8
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
485 # 6c) PDPs last
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
486 pdp_keys = sorted(k for k in self.explainer_plots if k.startswith("pdp__"))
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
487 for k in pdp_keys:
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
488 fig_or_fn = self.explainer_plots[k]
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
489 fig = fig_or_fn() if callable(fig_or_fn) else fig_or_fn
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
490 # extract feature name
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
491 feature = k.split("__", 1)[1]
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
492 title = f"Partial Dependence for {feature}"
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
493 feature_html += (
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
494 f"<h2>{title}</h2>" + add_plot_to_html(fig) + add_hr_to_html()
6
a32ff7201629 planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 3
diff changeset
495 )
8
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
496 # 7) Assemble final HTML (three tabs)
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
497 html = get_html_template()
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
498 html += "<h1>Tabular Learner Model Report</h1>"
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
499 html += build_tabbed_html(summary_html, test_html, feature_html)
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
500 html += get_feature_metrics_help_modal()
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
501 html += get_html_closing()
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
502
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
503 # 8) Write out
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
504 (Path(self.output_dir) / "comparison_result.html").write_text(
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
505 html, encoding="utf-8"
6
a32ff7201629 planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 3
diff changeset
506 )
8
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
507 LOG.info(f"HTML report generated at: {self.output_dir}/comparison_result.html")
0
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
508
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
509 def save_dashboard(self):
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
510 raise NotImplementedError("Subclasses should implement this method")
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
511
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
512 def generate_plots_explainer(self):
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
513 raise NotImplementedError("Subclasses should implement this method")
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
514
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
515 def generate_tree_plots(self):
6
a32ff7201629 planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 3
diff changeset
516 from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
0
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
517 from xgboost import XGBClassifier, XGBRegressor
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
518 from explainerdashboard.explainers import RandomForestExplainer
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
519
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
520 LOG.info("Generating tree plots")
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
521 X_test = self.exp.X_test_transformed.copy()
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
522 y_test = self.exp.y_test_transformed
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
523
8
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
524 if isinstance(self.best_model, (RandomForestClassifier, RandomForestRegressor)):
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
525 n_trees = self.best_model.n_estimators
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
526 elif isinstance(self.best_model, (XGBClassifier, XGBRegressor)):
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
527 n_trees = len(self.best_model.get_booster().get_dump())
6
a32ff7201629 planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 3
diff changeset
528 else:
a32ff7201629 planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 3
diff changeset
529 LOG.warning("Tree plots not supported for this model type.")
a32ff7201629 planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents: 3
diff changeset
530 return
0
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
531
8
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
532 explainer = RandomForestExplainer(self.best_model, X_test, y_test)
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
533 for i in range(n_trees):
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
534 fig = explainer.decisiontree_encoded(tree_idx=i, index=0)
1aed7d47c5ec planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents: 7
diff changeset
535 self.trees.append(fig)
0
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
536
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
537 def run(self):
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
538 self.load_data()
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
539 self.setup_pycaret()
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
540 self.train_model()
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
541 self.save_model()
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
542 self.generate_plots()
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
543 self.generate_plots_explainer()
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
544 self.generate_tree_plots()
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
545 self.save_html_report()
1f20fe57fdee planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
goeckslab
parents:
diff changeset
546 # self.save_dashboard()