Mercurial > repos > goeckslab > tabular_learner
annotate base_model_trainer.py @ 6:4bd75b45a7a1 draft default tip
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
author | goeckslab |
---|---|
date | Fri, 01 Aug 2025 14:02:13 +0000 |
parents | 3d42f82b3c7f |
children |
rev | line source |
---|---|
0
209b663a4f62
planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff
changeset
|
1 import base64 |
209b663a4f62
planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff
changeset
|
2 import logging |
209b663a4f62
planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff
changeset
|
3 import tempfile |
4
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
4 from pathlib import Path |
0
209b663a4f62
planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff
changeset
|
5 |
209b663a4f62
planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff
changeset
|
6 import h5py |
209b663a4f62
planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff
changeset
|
7 import joblib |
209b663a4f62
planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff
changeset
|
8 import numpy as np |
209b663a4f62
planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff
changeset
|
9 import pandas as pd |
2
77c88226bfde
planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
10 from feature_help_modal import get_feature_metrics_help_modal |
0
209b663a4f62
planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff
changeset
|
11 from feature_importance import FeatureImportanceAnalyzer |
209b663a4f62
planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff
changeset
|
12 from sklearn.metrics import average_precision_score |
4
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
13 from utils import ( |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
14 add_hr_to_html, |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
15 add_plot_to_html, |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
16 build_tabbed_html, |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
17 encode_image_to_base64, |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
18 get_html_closing, |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
19 get_html_template, |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
20 ) |
0
209b663a4f62
planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff
changeset
|
21 |
209b663a4f62
planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff
changeset
|
22 logging.basicConfig(level=logging.DEBUG) |
209b663a4f62
planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff
changeset
|
23 LOG = logging.getLogger(__name__) |
209b663a4f62
planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff
changeset
|
24 |
209b663a4f62
planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff
changeset
|
25 |
209b663a4f62
planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff
changeset
|
26 class BaseModelTrainer: |
209b663a4f62
planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff
changeset
|
27 def __init__( |
2
77c88226bfde
planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
28 self, |
77c88226bfde
planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
29 input_file, |
77c88226bfde
planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
30 target_col, |
77c88226bfde
planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
31 output_dir, |
77c88226bfde
planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
32 task_type, |
77c88226bfde
planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
33 random_seed, |
77c88226bfde
planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
34 test_file=None, |
77c88226bfde
planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
35 **kwargs, |
77c88226bfde
planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
36 ): |
4
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
37 self.exp = None |
0
209b663a4f62
planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff
changeset
|
38 self.input_file = input_file |
209b663a4f62
planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff
changeset
|
39 self.target_col = target_col |
209b663a4f62
planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff
changeset
|
40 self.output_dir = output_dir |
209b663a4f62
planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff
changeset
|
41 self.task_type = task_type |
209b663a4f62
planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff
changeset
|
42 self.random_seed = random_seed |
209b663a4f62
planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff
changeset
|
43 self.data = None |
209b663a4f62
planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff
changeset
|
44 self.target = None |
209b663a4f62
planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff
changeset
|
45 self.best_model = None |
209b663a4f62
planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff
changeset
|
46 self.results = None |
6
4bd75b45a7a1
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
5
diff
changeset
|
47 self.tuning_results = None |
0
209b663a4f62
planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff
changeset
|
48 self.features_name = None |
209b663a4f62
planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff
changeset
|
49 self.plots = {} |
4
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
50 self.explainer_plots = {} |
0
209b663a4f62
planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff
changeset
|
51 self.plots_explainer_html = None |
209b663a4f62
planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff
changeset
|
52 self.trees = [] |
4
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
53 self.user_kwargs = kwargs.copy() |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
54 for key, value in self.user_kwargs.items(): |
0
209b663a4f62
planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff
changeset
|
55 setattr(self, key, value) |
209b663a4f62
planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff
changeset
|
56 self.setup_params = {} |
209b663a4f62
planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff
changeset
|
57 self.test_file = test_file |
209b663a4f62
planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff
changeset
|
58 self.test_data = None |
209b663a4f62
planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff
changeset
|
59 |
2
77c88226bfde
planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
60 if not self.output_dir: |
6
4bd75b45a7a1
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
5
diff
changeset
|
61 raise ValueError( |
4bd75b45a7a1
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
5
diff
changeset
|
62 "output_dir must be specified and not None" |
4bd75b45a7a1
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
5
diff
changeset
|
63 ) |
4bd75b45a7a1
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
5
diff
changeset
|
64 |
4bd75b45a7a1
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
5
diff
changeset
|
65 # Warn about irrelevant kwargs for the task type |
4bd75b45a7a1
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
5
diff
changeset
|
66 if self.task_type == "regression" and ( |
4bd75b45a7a1
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
5
diff
changeset
|
67 "probability_threshold" in self.user_kwargs |
4bd75b45a7a1
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
5
diff
changeset
|
68 ): |
4bd75b45a7a1
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
5
diff
changeset
|
69 LOG.warning( |
4bd75b45a7a1
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
5
diff
changeset
|
70 "probability_threshold is ignored for regression tasks." |
4bd75b45a7a1
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
5
diff
changeset
|
71 ) |
2
77c88226bfde
planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
72 |
0
209b663a4f62
planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff
changeset
|
73 LOG.info(f"Model kwargs: {self.__dict__}") |
209b663a4f62
planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff
changeset
|
74 |
209b663a4f62
planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff
changeset
|
75 def load_data(self): |
209b663a4f62
planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff
changeset
|
76 LOG.info(f"Loading data from {self.input_file}") |
6
4bd75b45a7a1
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
5
diff
changeset
|
77 self.data = pd.read_csv( |
4bd75b45a7a1
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
5
diff
changeset
|
78 self.input_file, sep=None, engine="python" |
4bd75b45a7a1
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
5
diff
changeset
|
79 ) |
2
77c88226bfde
planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
80 self.data.columns = self.data.columns.str.replace(".", "_") |
6
4bd75b45a7a1
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
5
diff
changeset
|
81 |
4bd75b45a7a1
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
5
diff
changeset
|
82 names = self.data.columns.to_list() |
4bd75b45a7a1
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
5
diff
changeset
|
83 LOG.info(f"Original dataset columns: {names}") |
4bd75b45a7a1
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
5
diff
changeset
|
84 |
4bd75b45a7a1
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
5
diff
changeset
|
85 target_index = int(self.target_col) - 1 |
4bd75b45a7a1
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
5
diff
changeset
|
86 num_cols = len(names) |
4bd75b45a7a1
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
5
diff
changeset
|
87 if target_index < 0 or target_index >= num_cols: |
4bd75b45a7a1
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
5
diff
changeset
|
88 raise ValueError( |
4bd75b45a7a1
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
5
diff
changeset
|
89 f"Target column number {self.target_col} is invalid. " |
4bd75b45a7a1
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
5
diff
changeset
|
90 f"Please select a number between 1 and {num_cols}." |
4bd75b45a7a1
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
5
diff
changeset
|
91 ) |
4bd75b45a7a1
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
5
diff
changeset
|
92 |
4bd75b45a7a1
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
5
diff
changeset
|
93 self.target = names[target_index] |
4bd75b45a7a1
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
5
diff
changeset
|
94 |
4bd75b45a7a1
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
5
diff
changeset
|
95 # Conditional drop: only if 'prediction_label' exists and is not |
4bd75b45a7a1
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
5
diff
changeset
|
96 # the target |
4bd75b45a7a1
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
5
diff
changeset
|
97 if "prediction_label" in self.data.columns and ( |
4bd75b45a7a1
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
5
diff
changeset
|
98 self.data.columns[target_index] != "prediction_label" |
4bd75b45a7a1
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
5
diff
changeset
|
99 ): |
4bd75b45a7a1
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
5
diff
changeset
|
100 LOG.info( |
4bd75b45a7a1
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
5
diff
changeset
|
101 "Dropping 'prediction_label' column as it's not the target." |
4bd75b45a7a1
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
5
diff
changeset
|
102 ) |
2
77c88226bfde
planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
103 self.data = self.data.drop(columns=["prediction_label"]) |
6
4bd75b45a7a1
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
5
diff
changeset
|
104 else: |
4bd75b45a7a1
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
5
diff
changeset
|
105 if self.target == "prediction_label": |
4bd75b45a7a1
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
5
diff
changeset
|
106 LOG.warning( |
4bd75b45a7a1
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
5
diff
changeset
|
107 "Using 'prediction_label' as target column. " |
4bd75b45a7a1
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
5
diff
changeset
|
108 "This may not be intended if it's a previous prediction." |
4bd75b45a7a1
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
5
diff
changeset
|
109 ) |
2
77c88226bfde
planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
110 |
6
4bd75b45a7a1
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
5
diff
changeset
|
111 numeric_cols = self.data.select_dtypes( |
4bd75b45a7a1
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
5
diff
changeset
|
112 include=["number"] |
4bd75b45a7a1
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
5
diff
changeset
|
113 ).columns |
4bd75b45a7a1
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
5
diff
changeset
|
114 non_numeric_cols = self.data.select_dtypes( |
4bd75b45a7a1
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
5
diff
changeset
|
115 exclude=["number"] |
4bd75b45a7a1
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
5
diff
changeset
|
116 ).columns |
0
209b663a4f62
planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff
changeset
|
117 self.data[numeric_cols] = self.data[numeric_cols].apply( |
2
77c88226bfde
planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
118 pd.to_numeric, errors="coerce" |
77c88226bfde
planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
119 ) |
0
209b663a4f62
planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff
changeset
|
120 if len(non_numeric_cols) > 0: |
6
4bd75b45a7a1
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
5
diff
changeset
|
121 LOG.info( |
4bd75b45a7a1
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
5
diff
changeset
|
122 f"Non-numeric columns found: {non_numeric_cols.tolist()}" |
4bd75b45a7a1
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
5
diff
changeset
|
123 ) |
0
209b663a4f62
planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff
changeset
|
124 |
6
4bd75b45a7a1
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
5
diff
changeset
|
125 # Update names after possible drop |
0
209b663a4f62
planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff
changeset
|
126 names = self.data.columns.to_list() |
6
4bd75b45a7a1
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
5
diff
changeset
|
127 LOG.info(f"Dataset columns after processing: {names}") |
4bd75b45a7a1
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
5
diff
changeset
|
128 |
4bd75b45a7a1
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
5
diff
changeset
|
129 self.features_name = [n for n in names if n != self.target] |
4
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
130 |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
131 if getattr(self, "missing_value_strategy", None): |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
132 strat = self.missing_value_strategy |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
133 if strat == "mean": |
6
4bd75b45a7a1
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
5
diff
changeset
|
134 self.data = self.data.fillna( |
4bd75b45a7a1
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
5
diff
changeset
|
135 self.data.mean(numeric_only=True) |
4bd75b45a7a1
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
5
diff
changeset
|
136 ) |
4
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
137 elif strat == "median": |
6
4bd75b45a7a1
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
5
diff
changeset
|
138 self.data = self.data.fillna( |
4bd75b45a7a1
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
5
diff
changeset
|
139 self.data.median(numeric_only=True) |
4bd75b45a7a1
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
5
diff
changeset
|
140 ) |
4
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
141 elif strat == "drop": |
0
209b663a4f62
planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff
changeset
|
142 self.data = self.data.dropna() |
209b663a4f62
planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff
changeset
|
143 else: |
6
4bd75b45a7a1
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
5
diff
changeset
|
144 self.data = self.data.fillna( |
4bd75b45a7a1
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
5
diff
changeset
|
145 self.data.median(numeric_only=True) |
4bd75b45a7a1
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
5
diff
changeset
|
146 ) |
0
209b663a4f62
planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff
changeset
|
147 |
209b663a4f62
planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff
changeset
|
148 if self.test_file: |
209b663a4f62
planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff
changeset
|
149 LOG.info(f"Loading test data from {self.test_file}") |
6
4bd75b45a7a1
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
5
diff
changeset
|
150 df_test = pd.read_csv( |
4bd75b45a7a1
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
5
diff
changeset
|
151 self.test_file, sep=None, engine="python" |
4bd75b45a7a1
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
5
diff
changeset
|
152 ) |
4
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
153 df_test.columns = df_test.columns.str.replace(".", "_") |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
154 self.test_data = df_test |
0
209b663a4f62
planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff
changeset
|
155 |
209b663a4f62
planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff
changeset
|
156 def setup_pycaret(self): |
209b663a4f62
planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff
changeset
|
157 LOG.info("Initializing PyCaret") |
209b663a4f62
planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff
changeset
|
158 self.setup_params = { |
2
77c88226bfde
planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
159 "target": self.target, |
77c88226bfde
planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
160 "session_id": self.random_seed, |
77c88226bfde
planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
161 "html": True, |
77c88226bfde
planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
162 "log_experiment": False, |
77c88226bfde
planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
163 "system_log": False, |
77c88226bfde
planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
164 "index": False, |
0
209b663a4f62
planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff
changeset
|
165 } |
209b663a4f62
planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff
changeset
|
166 if self.test_data is not None: |
2
77c88226bfde
planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
167 self.setup_params["test_data"] = self.test_data |
4
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
168 for attr in [ |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
169 "train_size", |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
170 "normalize", |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
171 "feature_selection", |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
172 "remove_outliers", |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
173 "remove_multicollinearity", |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
174 "polynomial_features", |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
175 "feature_interaction", |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
176 "feature_ratio", |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
177 "fix_imbalance", |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
178 ]: |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
179 val = getattr(self, attr, None) |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
180 if val is not None: |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
181 self.setup_params[attr] = val |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
182 if getattr(self, "cross_validation_folds", None) is not None: |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
183 self.setup_params["fold"] = self.cross_validation_folds |
0
209b663a4f62
planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff
changeset
|
184 LOG.info(self.setup_params) |
2
77c88226bfde
planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
185 |
77c88226bfde
planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
186 if self.task_type == "classification": |
77c88226bfde
planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
187 from pycaret.classification import ClassificationExperiment |
77c88226bfde
planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
188 |
77c88226bfde
planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
189 self.exp = ClassificationExperiment() |
77c88226bfde
planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
190 elif self.task_type == "regression": |
77c88226bfde
planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
191 from pycaret.regression import RegressionExperiment |
77c88226bfde
planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
192 |
77c88226bfde
planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
193 self.exp = RegressionExperiment() |
77c88226bfde
planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
194 else: |
6
4bd75b45a7a1
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
5
diff
changeset
|
195 raise ValueError( |
4bd75b45a7a1
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
5
diff
changeset
|
196 "task_type must be 'classification' or 'regression'" |
4bd75b45a7a1
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
5
diff
changeset
|
197 ) |
2
77c88226bfde
planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
198 |
0
209b663a4f62
planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff
changeset
|
199 self.exp.setup(self.data, **self.setup_params) |
4
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
200 self.setup_params.update(self.user_kwargs) |
0
209b663a4f62
planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff
changeset
|
201 |
209b663a4f62
planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff
changeset
|
202 def train_model(self): |
209b663a4f62
planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff
changeset
|
203 LOG.info("Training and selecting the best model") |
209b663a4f62
planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff
changeset
|
204 if self.task_type == "classification": |
2
77c88226bfde
planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
205 self.exp.add_metric( |
4
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
206 id="PR-AUC-Weighted", |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
207 name="PR-AUC-Weighted", |
2
77c88226bfde
planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
208 target="pred_proba", |
77c88226bfde
planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
209 score_func=average_precision_score, |
77c88226bfde
planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
210 average="weighted", |
77c88226bfde
planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
211 ) |
4
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
212 # Build arguments for compare_models() |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
213 compare_kwargs = {} |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
214 if getattr(self, "models", None): |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
215 compare_kwargs["include"] = self.models |
0
209b663a4f62
planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff
changeset
|
216 |
4
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
217 # Respect explicit cross-validation flag |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
218 if getattr(self, "cross_validation", None) is not None: |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
219 compare_kwargs["cross_validation"] = self.cross_validation |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
220 |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
221 # Respect explicit fold count |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
222 if getattr(self, "cross_validation_folds", None) is not None: |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
223 compare_kwargs["fold"] = self.cross_validation_folds |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
224 |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
225 LOG.info(f"compare_models kwargs: {compare_kwargs}") |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
226 self.best_model = self.exp.compare_models(**compare_kwargs) |
0
209b663a4f62
planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff
changeset
|
227 self.results = self.exp.pull() |
4
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
228 if getattr(self, "tune_model", False): |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
229 LOG.info("Tuning hyperparameters of the best model") |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
230 self.best_model = self.exp.tune_model(self.best_model) |
6
4bd75b45a7a1
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
5
diff
changeset
|
231 self.tuning_results = self.exp.pull() |
3
f6a65e05d6ec
planemo upload for repository https://github.com/goeckslab/gleam commit b430f8b466655878c3bf63b053655fdbf039ddb0
goeckslab
parents:
2
diff
changeset
|
232 |
0
209b663a4f62
planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff
changeset
|
233 if self.task_type == "classification": |
2
77c88226bfde
planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
234 self.results.rename(columns={"AUC": "ROC-AUC"}, inplace=True) |
5
3d42f82b3c7f
planemo upload for repository https://github.com/goeckslab/gleam commit 4a11e8a4c4e9daa884bddedfa47090476c517667
goeckslab
parents:
4
diff
changeset
|
235 |
3d42f82b3c7f
planemo upload for repository https://github.com/goeckslab/gleam commit 4a11e8a4c4e9daa884bddedfa47090476c517667
goeckslab
parents:
4
diff
changeset
|
236 prob_thresh = getattr(self, "probability_threshold", None) |
6
4bd75b45a7a1
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
5
diff
changeset
|
237 if self.task_type == "classification" and ( |
4bd75b45a7a1
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
5
diff
changeset
|
238 prob_thresh is not None |
4bd75b45a7a1
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
5
diff
changeset
|
239 ): |
4bd75b45a7a1
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
5
diff
changeset
|
240 _ = self.exp.predict_model( |
4bd75b45a7a1
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
5
diff
changeset
|
241 self.best_model, probability_threshold=prob_thresh |
4bd75b45a7a1
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
5
diff
changeset
|
242 ) |
5
3d42f82b3c7f
planemo upload for repository https://github.com/goeckslab/gleam commit 4a11e8a4c4e9daa884bddedfa47090476c517667
goeckslab
parents:
4
diff
changeset
|
243 else: |
3d42f82b3c7f
planemo upload for repository https://github.com/goeckslab/gleam commit 4a11e8a4c4e9daa884bddedfa47090476c517667
goeckslab
parents:
4
diff
changeset
|
244 _ = self.exp.predict_model(self.best_model) |
3d42f82b3c7f
planemo upload for repository https://github.com/goeckslab/gleam commit 4a11e8a4c4e9daa884bddedfa47090476c517667
goeckslab
parents:
4
diff
changeset
|
245 |
0
209b663a4f62
planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff
changeset
|
246 self.test_result_df = self.exp.pull() |
209b663a4f62
planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff
changeset
|
247 if self.task_type == "classification": |
6
4bd75b45a7a1
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
5
diff
changeset
|
248 self.test_result_df.rename( |
4bd75b45a7a1
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
5
diff
changeset
|
249 columns={"AUC": "ROC-AUC"}, inplace=True |
4bd75b45a7a1
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
5
diff
changeset
|
250 ) |
0
209b663a4f62
planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff
changeset
|
251 |
209b663a4f62
planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff
changeset
|
252 def save_model(self): |
4
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
253 hdf5_path = Path(self.output_dir) / "pycaret_model.h5" |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
254 with h5py.File(hdf5_path, "w") as f: |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
255 with tempfile.NamedTemporaryFile(delete=False) as tmp: |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
256 joblib.dump(self.best_model, tmp.name) |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
257 tmp.seek(0) |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
258 model_bytes = tmp.read() |
2
77c88226bfde
planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
259 f.create_dataset("model", data=np.void(model_bytes)) |
0
209b663a4f62
planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff
changeset
|
260 |
209b663a4f62
planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff
changeset
|
261 def generate_plots(self): |
4
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
262 LOG.info("Generating PyCaret diagnostic pltos") |
0
209b663a4f62
planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff
changeset
|
263 |
6
4bd75b45a7a1
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
5
diff
changeset
|
264 # choose the right plots based on task type |
4
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
265 if self.task_type == "classification": |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
266 plot_names = [ |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
267 "learning", |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
268 "vc", |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
269 "calibration", |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
270 "dimension", |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
271 "manifold", |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
272 "rfe", |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
273 "threshold", |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
274 "percentage_above_below", |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
275 "class_report", |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
276 "pr_auc", |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
277 "roc_auc", |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
278 ] |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
279 else: |
6
4bd75b45a7a1
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
5
diff
changeset
|
280 plot_names = ["residuals", "vc", "parameter", "error", |
4bd75b45a7a1
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
5
diff
changeset
|
281 "learning"] |
4
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
282 for name in plot_names: |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
283 try: |
6
4bd75b45a7a1
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
5
diff
changeset
|
284 ax = self.exp.plot_model( |
4bd75b45a7a1
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
5
diff
changeset
|
285 self.best_model, plot=name, save=False |
4bd75b45a7a1
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
5
diff
changeset
|
286 ) |
4
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
287 out_path = Path(self.output_dir) / f"plot_{name}.png" |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
288 fig = ax.get_figure() |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
289 fig.savefig(out_path, bbox_inches="tight") |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
290 self.plots[name] = str(out_path) |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
291 except Exception as e: |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
292 LOG.warning(f"Could not generate {name} plot: {e}") |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
293 |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
294 def encode_image_to_base64(self, img_path: str) -> str: |
2
77c88226bfde
planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
295 with open(img_path, "rb") as img_file: |
77c88226bfde
planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
296 return base64.b64encode(img_file.read()).decode("utf-8") |
0
209b663a4f62
planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff
changeset
|
297 |
209b663a4f62
planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff
changeset
|
298 def save_html_report(self): |
209b663a4f62
planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff
changeset
|
299 LOG.info("Saving HTML report") |
209b663a4f62
planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff
changeset
|
300 |
4
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
301 # 1) Determine best model name |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
302 try: |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
303 best_model_name = str(self.results.iloc[0]["Model"]) |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
304 except Exception: |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
305 best_model_name = type(self.best_model).__name__ |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
306 LOG.info(f"Best model determined as: {best_model_name}") |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
307 |
6
4bd75b45a7a1
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
5
diff
changeset
|
308 # 2) Compute training sample count |
4
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
309 try: |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
310 n_train = self.exp.X_train.shape[0] |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
311 except Exception: |
6
4bd75b45a7a1
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
5
diff
changeset
|
312 n_train = getattr( |
4bd75b45a7a1
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
5
diff
changeset
|
313 self.exp, "X_train_transformed", pd.DataFrame() |
4bd75b45a7a1
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
5
diff
changeset
|
314 ).shape[0] |
4
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
315 total_rows = self.data.shape[0] |
2
77c88226bfde
planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
316 |
4
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
317 # 3) Build setup parameters table |
5
3d42f82b3c7f
planemo upload for repository https://github.com/goeckslab/gleam commit 4a11e8a4c4e9daa884bddedfa47090476c517667
goeckslab
parents:
4
diff
changeset
|
318 all_params = self.setup_params.copy() |
6
4bd75b45a7a1
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
5
diff
changeset
|
319 if self.task_type == "classification" and ( |
4bd75b45a7a1
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
5
diff
changeset
|
320 hasattr(self, "probability_threshold") |
4bd75b45a7a1
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
5
diff
changeset
|
321 ): |
4bd75b45a7a1
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
5
diff
changeset
|
322 all_params["probability_threshold"] = ( |
4bd75b45a7a1
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
5
diff
changeset
|
323 self.probability_threshold |
4bd75b45a7a1
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
5
diff
changeset
|
324 ) |
4
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
325 display_keys = [ |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
326 "Target", |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
327 "Session ID", |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
328 "Train Size", |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
329 "Normalize", |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
330 "Feature Selection", |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
331 "Cross Validation", |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
332 "Cross Validation Folds", |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
333 "Remove Outliers", |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
334 "Remove Multicollinearity", |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
335 "Polynomial Features", |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
336 "Fix Imbalance", |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
337 "Models", |
5
3d42f82b3c7f
planemo upload for repository https://github.com/goeckslab/gleam commit 4a11e8a4c4e9daa884bddedfa47090476c517667
goeckslab
parents:
4
diff
changeset
|
338 "Probability Threshold", |
4
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
339 ] |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
340 setup_rows = [] |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
341 for key in display_keys: |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
342 pk = key.lower().replace(" ", "_") |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
343 v = all_params.get(pk) |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
344 if key == "Train Size": |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
345 frac = ( |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
346 float(v) |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
347 if v is not None |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
348 else (n_train / total_rows if total_rows else 0) |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
349 ) |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
350 dv = f"{frac:.2f} ({n_train} rows)" |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
351 elif key in { |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
352 "Normalize", |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
353 "Feature Selection", |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
354 "Cross Validation", |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
355 "Remove Outliers", |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
356 "Remove Multicollinearity", |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
357 "Polynomial Features", |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
358 "Fix Imbalance", |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
359 }: |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
360 dv = bool(v) |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
361 elif key == "Cross Validation Folds": |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
362 dv = v if v is not None else "None" |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
363 elif key == "Models": |
6
4bd75b45a7a1
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
5
diff
changeset
|
364 dv = ", ".join(map(str, v)) if isinstance( |
4bd75b45a7a1
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
5
diff
changeset
|
365 v, (list, tuple) |
4bd75b45a7a1
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
5
diff
changeset
|
366 ) else "None" |
5
3d42f82b3c7f
planemo upload for repository https://github.com/goeckslab/gleam commit 4a11e8a4c4e9daa884bddedfa47090476c517667
goeckslab
parents:
4
diff
changeset
|
367 elif key == "Probability Threshold": |
6
4bd75b45a7a1
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
5
diff
changeset
|
368 dv = f"{v:.2f}" if v is not None else "0.5" |
4
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
369 else: |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
370 dv = v if v is not None else "None" |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
371 setup_rows.append([key, dv]) |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
372 if hasattr(self.exp, "_fold_metric"): |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
373 setup_rows.append(["best_model_metric", self.exp._fold_metric]) |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
374 |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
375 df_setup = pd.DataFrame(setup_rows, columns=["Parameter", "Value"]) |
6
4bd75b45a7a1
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
5
diff
changeset
|
376 df_setup.to_csv( |
4bd75b45a7a1
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
5
diff
changeset
|
377 Path(self.output_dir) / "setup_params.csv", index=False |
4bd75b45a7a1
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
5
diff
changeset
|
378 ) |
4
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
379 |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
380 # 4) Persist CSVs |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
381 self.results.to_csv( |
6
4bd75b45a7a1
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
5
diff
changeset
|
382 Path(self.output_dir) / "comparison_results.csv", |
4bd75b45a7a1
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
5
diff
changeset
|
383 index=False |
4
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
384 ) |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
385 self.test_result_df.to_csv( |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
386 Path(self.output_dir) / "test_results.csv", index=False |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
387 ) |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
388 pd.DataFrame( |
6
4bd75b45a7a1
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
5
diff
changeset
|
389 self.best_model.get_params().items(), |
4bd75b45a7a1
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
5
diff
changeset
|
390 columns=["Parameter", "Value"] |
4
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
391 ).to_csv(Path(self.output_dir) / "best_model.csv", index=False) |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
392 |
6
4bd75b45a7a1
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
5
diff
changeset
|
393 if self.tuning_results is not None: |
4bd75b45a7a1
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
5
diff
changeset
|
394 self.tuning_results.to_csv( |
4bd75b45a7a1
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
5
diff
changeset
|
395 Path(self.output_dir) / "tuning_results.csv", |
4bd75b45a7a1
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
5
diff
changeset
|
396 index=False |
4bd75b45a7a1
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
5
diff
changeset
|
397 ) |
4bd75b45a7a1
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
5
diff
changeset
|
398 |
4
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
399 # 5) Header |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
400 header = f"<h2>Best Model: {best_model_name}</h2>" |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
401 |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
402 # — Validation Summary & Configuration — |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
403 val_df = self.results.copy() |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
404 # mapping raw plot keys to user-friendly titles |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
405 plot_title_map = { |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
406 "learning": "Learning Curve", |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
407 "vc": "Validation Curve", |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
408 "calibration": "Calibration Curve", |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
409 "dimension": "Dimensionality Reduction", |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
410 "manifold": "Manifold Learning", |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
411 "rfe": "Recursive Feature Elimination", |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
412 "threshold": "Threshold Plot", |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
413 "percentage_above_below": "Percentage Above vs. Below Cutoff", |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
414 "class_report": "Classification Report", |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
415 "pr_auc": "Precision-Recall AUC", |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
416 "roc_auc": "Receiver Operating Characteristic AUC", |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
417 "residuals": "Residuals Distribution", |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
418 "error": "Prediction Error Distribution", |
0
209b663a4f62
planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff
changeset
|
419 } |
6
4bd75b45a7a1
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
5
diff
changeset
|
420 val_df.drop( |
4bd75b45a7a1
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
5
diff
changeset
|
421 columns=["TT (Ec)", "TT (Sec)"], errors="ignore", inplace=True |
4bd75b45a7a1
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
5
diff
changeset
|
422 ) |
4
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
423 summary_html = ( |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
424 header |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
425 + "<h2>Train & Validation Summary</h2>" |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
426 + '<div class="table-wrapper">' |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
427 + val_df.to_html(index=False, classes="table sortable") |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
428 + "</div>" |
6
4bd75b45a7a1
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
5
diff
changeset
|
429 ) |
4bd75b45a7a1
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
5
diff
changeset
|
430 |
4bd75b45a7a1
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
5
diff
changeset
|
431 if self.tuning_results is not None: |
4bd75b45a7a1
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
5
diff
changeset
|
432 tuning_df = self.tuning_results.copy() |
4bd75b45a7a1
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
5
diff
changeset
|
433 tuning_df.drop( |
4bd75b45a7a1
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
5
diff
changeset
|
434 columns=["TT (Sec)"], errors="ignore", inplace=True |
4bd75b45a7a1
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
5
diff
changeset
|
435 ) |
4bd75b45a7a1
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
5
diff
changeset
|
436 summary_html += ( |
4bd75b45a7a1
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
5
diff
changeset
|
437 f"<h2>{best_model_name}: Tuning Summary</h2>" |
4bd75b45a7a1
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
5
diff
changeset
|
438 + '<div class="table-wrapper">' |
4bd75b45a7a1
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
5
diff
changeset
|
439 + tuning_df.to_html(index=False, classes="table sortable") |
4bd75b45a7a1
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
5
diff
changeset
|
440 + "</div>" |
4bd75b45a7a1
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
5
diff
changeset
|
441 ) |
4bd75b45a7a1
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
5
diff
changeset
|
442 |
4bd75b45a7a1
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
5
diff
changeset
|
443 summary_html += ( |
4bd75b45a7a1
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
5
diff
changeset
|
444 "<h2>Setup Parameters</h2>" |
4
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
445 + '<div class="table-wrapper">' |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
446 + df_setup.to_html(index=False, classes="table sortable") |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
447 + "</div>" |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
448 # — Hyperparameters |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
449 + "<h2>Best Model Hyperparameters</h2>" |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
450 + '<div class="table-wrapper">' |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
451 + pd.DataFrame( |
6
4bd75b45a7a1
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
5
diff
changeset
|
452 self.best_model.get_params().items(), |
4bd75b45a7a1
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
5
diff
changeset
|
453 columns=["Parameter", "Value"] |
4
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
454 ).to_html(index=False, classes="table sortable") |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
455 + "</div>" |
0
209b663a4f62
planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff
changeset
|
456 ) |
209b663a4f62
planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff
changeset
|
457 |
4
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
458 # choose summary plots based on task type |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
459 if self.task_type == "classification": |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
460 summary_plots = [ |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
461 "learning", |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
462 "vc", |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
463 "calibration", |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
464 "dimension", |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
465 "manifold", |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
466 "rfe", |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
467 "threshold", |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
468 "percentage_above_below", |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
469 ] |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
470 else: |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
471 summary_plots = ["learning", "vc", "parameter", "residuals"] |
0
209b663a4f62
planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff
changeset
|
472 |
4
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
473 for name in summary_plots: |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
474 if name in self.plots: |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
475 summary_html += "<hr>" |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
476 b64 = encode_image_to_base64(self.plots[name]) |
6
4bd75b45a7a1
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
5
diff
changeset
|
477 title = plot_title_map.get( |
4bd75b45a7a1
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
5
diff
changeset
|
478 name, name.replace("_", " ").title() |
4bd75b45a7a1
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
5
diff
changeset
|
479 ) |
4
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
480 summary_html += ( |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
481 '<div class="plot">' |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
482 f"<h2>{title}</h2>" |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
483 f'<img src="data:image/png;base64,{b64}" ' |
6
4bd75b45a7a1
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
5
diff
changeset
|
484 'style="max-width:90%;max-height:600px;' |
4bd75b45a7a1
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
5
diff
changeset
|
485 'border:1px solid #ddd;"/>' |
4
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
486 "</div>" |
2
77c88226bfde
planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
487 ) |
0
209b663a4f62
planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff
changeset
|
488 |
4
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
489 # — Test Summary — |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
490 test_html = ( |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
491 header |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
492 + '<div class="table-wrapper">' |
6
4bd75b45a7a1
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
5
diff
changeset
|
493 + self.test_result_df.to_html( |
4bd75b45a7a1
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
5
diff
changeset
|
494 index=False, classes="table sortable" |
4bd75b45a7a1
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
5
diff
changeset
|
495 ) |
4
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
496 + "</div>" |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
497 ) |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
498 if self.task_type == "regression": |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
499 try: |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
500 y_true = ( |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
501 pd.Series(self.exp.y_test_transformed) |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
502 .reset_index(drop=True) |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
503 .rename("True") |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
504 ) |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
505 y_pred = pd.Series( |
6
4bd75b45a7a1
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
5
diff
changeset
|
506 self.best_model.predict( |
4bd75b45a7a1
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
5
diff
changeset
|
507 self.exp.X_test_transformed |
4bd75b45a7a1
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
5
diff
changeset
|
508 ) |
4
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
509 ).rename("Predicted") |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
510 df_tp = pd.concat([y_true, y_pred], axis=1) |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
511 test_html += "<h2>True vs Predicted Values</h2>" |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
512 test_html += ( |
6
4bd75b45a7a1
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
5
diff
changeset
|
513 '<div class="table-wrapper" ' |
4bd75b45a7a1
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
5
diff
changeset
|
514 'style="max-height:400px; overflow-y:auto;">' |
4bd75b45a7a1
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
5
diff
changeset
|
515 + df_tp.head(50).to_html( |
4bd75b45a7a1
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
5
diff
changeset
|
516 index=False, classes="table sortable" |
4bd75b45a7a1
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
5
diff
changeset
|
517 ) |
4
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
518 + "</div>" |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
519 + add_hr_to_html() |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
520 ) |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
521 except Exception as e: |
6
4bd75b45a7a1
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
5
diff
changeset
|
522 LOG.warning( |
4bd75b45a7a1
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
5
diff
changeset
|
523 f"Could not generate True vs Predicted table: {e}" |
4bd75b45a7a1
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
5
diff
changeset
|
524 ) |
4
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
525 |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
526 # 5a) Explainer-substituted plots in order |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
527 if self.task_type == "regression": |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
528 test_order = ["residuals"] |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
529 else: |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
530 test_order = [ |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
531 "confusion_matrix", |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
532 "roc_auc", |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
533 "pr_auc", |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
534 "lift_curve", |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
535 "threshold", |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
536 "cumulative_precision", |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
537 ] |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
538 for key in test_order: |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
539 fig_or_fn = self.explainer_plots.pop(key, None) |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
540 if fig_or_fn is not None: |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
541 fig = fig_or_fn() if callable(fig_or_fn) else fig_or_fn |
6
4bd75b45a7a1
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
5
diff
changeset
|
542 title = plot_title_map.get( |
4bd75b45a7a1
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
5
diff
changeset
|
543 key, key.replace("_", " ").title() |
4bd75b45a7a1
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
5
diff
changeset
|
544 ) |
4
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
545 test_html += ( |
6
4bd75b45a7a1
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
5
diff
changeset
|
546 f"<h2>{title}</h2>" + add_plot_to_html(fig) |
4bd75b45a7a1
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
5
diff
changeset
|
547 + add_hr_to_html() |
4
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
548 ) |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
549 # 5b) Remaining PyCaret test plots |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
550 for name, path in self.plots.items(): |
6
4bd75b45a7a1
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
5
diff
changeset
|
551 # classification: include only the small extras, before |
4bd75b45a7a1
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
5
diff
changeset
|
552 # skipping anything |
4bd75b45a7a1
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
5
diff
changeset
|
553 if self.task_type == "classification" and ( |
4bd75b45a7a1
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
5
diff
changeset
|
554 name in { |
4bd75b45a7a1
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
5
diff
changeset
|
555 "threshold", |
4bd75b45a7a1
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
5
diff
changeset
|
556 "pr_auc", |
4bd75b45a7a1
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
5
diff
changeset
|
557 "class_report", |
4bd75b45a7a1
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
5
diff
changeset
|
558 } |
4bd75b45a7a1
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
5
diff
changeset
|
559 ): |
4bd75b45a7a1
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
5
diff
changeset
|
560 title = plot_title_map.get( |
4bd75b45a7a1
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
5
diff
changeset
|
561 name, name.replace("_", " ").title() |
4bd75b45a7a1
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
5
diff
changeset
|
562 ) |
4
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
563 b64 = encode_image_to_base64(path) |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
564 test_html += ( |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
565 f"<h2>{title}</h2>" |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
566 "<div class='plot'>" |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
567 f"<img src='data:image/png;base64,{b64}' " |
6
4bd75b45a7a1
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
5
diff
changeset
|
568 "style='max-width:90%;max-height:600px;" |
4bd75b45a7a1
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
5
diff
changeset
|
569 "border:1px solid #ddd;'/>" |
4
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
570 "</div>" + add_hr_to_html() |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
571 ) |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
572 continue |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
573 |
6
4bd75b45a7a1
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
5
diff
changeset
|
574 # regression: explicitly include the 'error' plot, |
4bd75b45a7a1
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
5
diff
changeset
|
575 # before skipping |
4bd75b45a7a1
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
5
diff
changeset
|
576 if self.task_type == "regression" and ( |
4bd75b45a7a1
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
5
diff
changeset
|
577 name == "error" |
4bd75b45a7a1
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
5
diff
changeset
|
578 ): |
4bd75b45a7a1
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
5
diff
changeset
|
579 title = plot_title_map.get( |
4bd75b45a7a1
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
5
diff
changeset
|
580 "error", "Prediction Error Distribution" |
4bd75b45a7a1
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
5
diff
changeset
|
581 ) |
4
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
582 b64 = encode_image_to_base64(path) |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
583 test_html += ( |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
584 f"<h2>{title}</h2>" |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
585 "<div class='plot'>" |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
586 f"<img src='data:image/png;base64,{b64}' " |
6
4bd75b45a7a1
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
5
diff
changeset
|
587 "style='max-width:90%;max-height:600px;" |
4bd75b45a7a1
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
5
diff
changeset
|
588 "border:1px solid #ddd;'/>" |
4
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
589 "</div>" + add_hr_to_html() |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
590 ) |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
591 continue |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
592 |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
593 # now skip any plots already rendered via test_order |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
594 if name in test_order: |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
595 continue |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
596 |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
597 # — Feature Importance — |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
598 feature_html = header |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
599 |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
600 # 6a) PyCaret’s default feature importances |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
601 feature_html += FeatureImportanceAnalyzer( |
0
209b663a4f62
planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff
changeset
|
602 data=self.data, |
209b663a4f62
planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff
changeset
|
603 target_col=self.target_col, |
209b663a4f62
planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff
changeset
|
604 task_type=self.task_type, |
209b663a4f62
planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff
changeset
|
605 output_dir=self.output_dir, |
2
77c88226bfde
planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
606 exp=self.exp, |
77c88226bfde
planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
607 best_model=self.best_model, |
4
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
608 ).run() |
0
209b663a4f62
planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff
changeset
|
609 |
4
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
610 # 6b) Explainer SHAP importances |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
611 for key in ["shap_mean", "shap_perm"]: |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
612 fig_or_fn = self.explainer_plots.pop(key, None) |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
613 if fig_or_fn is not None: |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
614 fig = fig_or_fn() if callable(fig_or_fn) else fig_or_fn |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
615 # give SHAP plots explicit titles |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
616 title = ( |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
617 "Mean Absolute SHAP Value Impact" |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
618 if key == "shap_mean" |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
619 else "Permutation Feature Importance" |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
620 ) |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
621 feature_html += ( |
6
4bd75b45a7a1
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
5
diff
changeset
|
622 f"<h2>{title}</h2>" + add_plot_to_html(fig) |
4bd75b45a7a1
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
5
diff
changeset
|
623 + add_hr_to_html() |
4
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
624 ) |
2
77c88226bfde
planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
625 |
4
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
626 # 6c) PDPs last |
6
4bd75b45a7a1
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
5
diff
changeset
|
627 pdp_keys = sorted( |
4bd75b45a7a1
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
5
diff
changeset
|
628 k for k in self.explainer_plots if k.startswith("pdp__") |
4bd75b45a7a1
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
5
diff
changeset
|
629 ) |
4
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
630 for k in pdp_keys: |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
631 fig_or_fn = self.explainer_plots[k] |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
632 fig = fig_or_fn() if callable(fig_or_fn) else fig_or_fn |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
633 # extract feature name |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
634 feature = k.split("__", 1)[1] |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
635 title = f"Partial Dependence for {feature}" |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
636 feature_html += ( |
6
4bd75b45a7a1
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
5
diff
changeset
|
637 f"<h2>{title}</h2>" + add_plot_to_html(fig) |
4bd75b45a7a1
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
5
diff
changeset
|
638 + add_hr_to_html() |
2
77c88226bfde
planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
639 ) |
4
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
640 # 7) Assemble final HTML (three tabs) |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
641 html = get_html_template() |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
642 html += "<h1>Tabular Learner Model Report</h1>" |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
643 html += build_tabbed_html(summary_html, test_html, feature_html) |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
644 html += get_feature_metrics_help_modal() |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
645 html += get_html_closing() |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
646 |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
647 # 8) Write out |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
648 (Path(self.output_dir) / "comparison_result.html").write_text( |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
649 html, encoding="utf-8" |
2
77c88226bfde
planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
650 ) |
6
4bd75b45a7a1
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
5
diff
changeset
|
651 LOG.info( |
4bd75b45a7a1
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
5
diff
changeset
|
652 f"HTML report generated at: " |
4bd75b45a7a1
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
5
diff
changeset
|
653 f"{self.output_dir}/comparison_result.html" |
4bd75b45a7a1
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
5
diff
changeset
|
654 ) |
0
209b663a4f62
planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff
changeset
|
655 |
209b663a4f62
planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff
changeset
|
656 def save_dashboard(self): |
209b663a4f62
planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff
changeset
|
657 raise NotImplementedError("Subclasses should implement this method") |
209b663a4f62
planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff
changeset
|
658 |
209b663a4f62
planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff
changeset
|
659 def generate_plots_explainer(self): |
209b663a4f62
planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff
changeset
|
660 raise NotImplementedError("Subclasses should implement this method") |
209b663a4f62
planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff
changeset
|
661 |
209b663a4f62
planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff
changeset
|
662 def generate_tree_plots(self): |
6
4bd75b45a7a1
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
5
diff
changeset
|
663 from sklearn.ensemble import ( |
4bd75b45a7a1
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
5
diff
changeset
|
664 RandomForestClassifier, RandomForestRegressor |
4bd75b45a7a1
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
5
diff
changeset
|
665 ) |
0
209b663a4f62
planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff
changeset
|
666 from xgboost import XGBClassifier, XGBRegressor |
209b663a4f62
planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff
changeset
|
667 from explainerdashboard.explainers import RandomForestExplainer |
209b663a4f62
planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff
changeset
|
668 |
209b663a4f62
planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff
changeset
|
669 LOG.info("Generating tree plots") |
209b663a4f62
planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff
changeset
|
670 X_test = self.exp.X_test_transformed.copy() |
209b663a4f62
planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff
changeset
|
671 y_test = self.exp.y_test_transformed |
209b663a4f62
planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff
changeset
|
672 |
6
4bd75b45a7a1
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
5
diff
changeset
|
673 if isinstance( |
4bd75b45a7a1
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
5
diff
changeset
|
674 self.best_model, (RandomForestClassifier, RandomForestRegressor) |
4bd75b45a7a1
planemo upload for repository https://github.com/goeckslab/gleam commit 47a5977e074223e92e216efa42969a4056516707
goeckslab
parents:
5
diff
changeset
|
675 ): |
4
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
676 n_trees = self.best_model.n_estimators |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
677 elif isinstance(self.best_model, (XGBClassifier, XGBRegressor)): |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
678 n_trees = len(self.best_model.get_booster().get_dump()) |
2
77c88226bfde
planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
679 else: |
77c88226bfde
planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
680 LOG.warning("Tree plots not supported for this model type.") |
77c88226bfde
planemo upload for repository https://github.com/goeckslab/gleam commit 06c0da44ac93256dfb616a6b40276b5485a71e8e
goeckslab
parents:
0
diff
changeset
|
681 return |
0
209b663a4f62
planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff
changeset
|
682 |
4
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
683 explainer = RandomForestExplainer(self.best_model, X_test, y_test) |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
684 for i in range(n_trees): |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
685 fig = explainer.decisiontree_encoded(tree_idx=i, index=0) |
11fdac5affb3
planemo upload for repository https://github.com/goeckslab/gleam commit 8112548ac44b7a4769093d76c722c8fcdeaaef54
goeckslab
parents:
3
diff
changeset
|
686 self.trees.append(fig) |
0
209b663a4f62
planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff
changeset
|
687 |
209b663a4f62
planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff
changeset
|
688 def run(self): |
209b663a4f62
planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff
changeset
|
689 self.load_data() |
209b663a4f62
planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff
changeset
|
690 self.setup_pycaret() |
209b663a4f62
planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff
changeset
|
691 self.train_model() |
209b663a4f62
planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff
changeset
|
692 self.save_model() |
209b663a4f62
planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff
changeset
|
693 self.generate_plots() |
209b663a4f62
planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff
changeset
|
694 self.generate_plots_explainer() |
209b663a4f62
planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff
changeset
|
695 self.generate_tree_plots() |
209b663a4f62
planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff
changeset
|
696 self.save_html_report() |
209b663a4f62
planemo upload for repository https://github.com/goeckslab/gleam commit 5dd048419fcbd285a327f88267e93996cd279ee6
goeckslab
parents:
diff
changeset
|
697 # self.save_dashboard() |