# HG changeset patch
# User goeckslab
# Date 1733893183 0
# Node ID 1f20fe57fdeeae143f730e3f3ca7db3a68e0bb2c
planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
diff -r 000000000000 -r 1f20fe57fdee base_model_trainer.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/base_model_trainer.py Wed Dec 11 04:59:43 2024 +0000
@@ -0,0 +1,359 @@
+import base64
+import logging
+import os
+import tempfile
+
+from feature_importance import FeatureImportanceAnalyzer
+
+import h5py
+
+import joblib
+
+import numpy as np
+
+import pandas as pd
+
+from sklearn.metrics import average_precision_score
+
+from utils import get_html_closing, get_html_template
+
+logging.basicConfig(level=logging.DEBUG)
+LOG = logging.getLogger(__name__)
+
+
+class BaseModelTrainer:
+
+ def __init__(
+ self,
+ input_file,
+ target_col,
+ output_dir,
+ task_type,
+ random_seed,
+ test_file=None,
+ **kwargs
+ ):
+ self.exp = None # This will be set in the subclass
+ self.input_file = input_file
+ self.target_col = target_col
+ self.output_dir = output_dir
+ self.task_type = task_type
+ self.random_seed = random_seed
+ self.data = None
+ self.target = None
+ self.best_model = None
+ self.results = None
+ self.features_name = None
+ self.plots = {}
+ self.expaliner = None
+ self.plots_explainer_html = None
+ self.trees = []
+ for key, value in kwargs.items():
+ setattr(self, key, value)
+ self.setup_params = {}
+ self.test_file = test_file
+ self.test_data = None
+
+ LOG.info(f"Model kwargs: {self.__dict__}")
+
+ def load_data(self):
+ LOG.info(f"Loading data from {self.input_file}")
+ self.data = pd.read_csv(self.input_file, sep=None, engine='python')
+ self.data.columns = self.data.columns.str.replace('.', '_')
+
+ numeric_cols = self.data.select_dtypes(include=['number']).columns
+ non_numeric_cols = self.data.select_dtypes(exclude=['number']).columns
+
+ self.data[numeric_cols] = self.data[numeric_cols].apply(
+ pd.to_numeric, errors='coerce')
+
+ if len(non_numeric_cols) > 0:
+ LOG.info(f"Non-numeric columns found: {non_numeric_cols.tolist()}")
+
+ names = self.data.columns.to_list()
+ target_index = int(self.target_col)-1
+ self.target = names[target_index]
+ self.features_name = [name
+ for i, name in enumerate(names)
+ if i != target_index]
+ if hasattr(self, 'missing_value_strategy'):
+ if self.missing_value_strategy == 'mean':
+ self.data = self.data.fillna(
+ self.data.mean(numeric_only=True))
+ elif self.missing_value_strategy == 'median':
+ self.data = self.data.fillna(
+ self.data.median(numeric_only=True))
+ elif self.missing_value_strategy == 'drop':
+ self.data = self.data.dropna()
+ else:
+ # Default strategy if not specified
+ self.data = self.data.fillna(self.data.median(numeric_only=True))
+
+ if self.test_file:
+ LOG.info(f"Loading test data from {self.test_file}")
+ self.test_data = pd.read_csv(
+ self.test_file, sep=None, engine='python')
+ self.test_data = self.test_data[numeric_cols].apply(
+ pd.to_numeric, errors='coerce')
+ self.test_data.columns = self.test_data.columns.str.replace(
+ '.', '_'
+ )
+
+ def setup_pycaret(self):
+ LOG.info("Initializing PyCaret")
+ self.setup_params = {
+ 'target': self.target,
+ 'session_id': self.random_seed,
+ 'html': True,
+ 'log_experiment': False,
+ 'system_log': False,
+ 'index': False,
+ }
+
+ if self.test_data is not None:
+ self.setup_params['test_data'] = self.test_data
+
+ if hasattr(self, 'train_size') and self.train_size is not None \
+ and self.test_data is None:
+ self.setup_params['train_size'] = self.train_size
+
+ if hasattr(self, 'normalize') and self.normalize is not None:
+ self.setup_params['normalize'] = self.normalize
+
+ if hasattr(self, 'feature_selection') and \
+ self.feature_selection is not None:
+ self.setup_params['feature_selection'] = self.feature_selection
+
+ if hasattr(self, 'cross_validation') and \
+ self.cross_validation is not None \
+ and self.cross_validation is False:
+ self.setup_params['cross_validation'] = self.cross_validation
+
+ if hasattr(self, 'cross_validation') and \
+ self.cross_validation is not None:
+ if hasattr(self, 'cross_validation_folds'):
+ self.setup_params['fold'] = self.cross_validation_folds
+
+ if hasattr(self, 'remove_outliers') and \
+ self.remove_outliers is not None:
+ self.setup_params['remove_outliers'] = self.remove_outliers
+
+ if hasattr(self, 'remove_multicollinearity') and \
+ self.remove_multicollinearity is not None:
+ self.setup_params['remove_multicollinearity'] = \
+ self.remove_multicollinearity
+
+ if hasattr(self, 'polynomial_features') and \
+ self.polynomial_features is not None:
+ self.setup_params['polynomial_features'] = self.polynomial_features
+
+ if hasattr(self, 'fix_imbalance') and \
+ self.fix_imbalance is not None:
+ self.setup_params['fix_imbalance'] = self.fix_imbalance
+
+ LOG.info(self.setup_params)
+ self.exp.setup(self.data, **self.setup_params)
+
+ def train_model(self):
+ LOG.info("Training and selecting the best model")
+ if self.task_type == "classification":
+ average_displayed = "Weighted"
+ self.exp.add_metric(id=f'PR-AUC-{average_displayed}',
+ name=f'PR-AUC-{average_displayed}',
+ target='pred_proba',
+ score_func=average_precision_score,
+ average='weighted'
+ )
+
+ if hasattr(self, 'models') and self.models is not None:
+ self.best_model = self.exp.compare_models(
+ include=self.models)
+ else:
+ self.best_model = self.exp.compare_models()
+ self.results = self.exp.pull()
+ if self.task_type == "classification":
+ self.results.rename(columns={'AUC': 'ROC-AUC'}, inplace=True)
+
+ _ = self.exp.predict_model(self.best_model)
+ self.test_result_df = self.exp.pull()
+ if self.task_type == "classification":
+ self.test_result_df.rename(
+ columns={'AUC': 'ROC-AUC'}, inplace=True)
+
+ def save_model(self):
+ hdf5_model_path = "pycaret_model.h5"
+ with h5py.File(hdf5_model_path, 'w') as f:
+ with tempfile.NamedTemporaryFile(delete=False) as temp_file:
+ joblib.dump(self.best_model, temp_file.name)
+ temp_file.seek(0)
+ model_bytes = temp_file.read()
+ f.create_dataset('model', data=np.void(model_bytes))
+
+ def generate_plots(self):
+ raise NotImplementedError("Subclasses should implement this method")
+
+ def encode_image_to_base64(self, img_path):
+ with open(img_path, 'rb') as img_file:
+ return base64.b64encode(img_file.read()).decode('utf-8')
+
+ def save_html_report(self):
+ LOG.info("Saving HTML report")
+
+ model_name = type(self.best_model).__name__
+ excluded_params = ['html', 'log_experiment', 'system_log', 'test_data']
+ filtered_setup_params = {
+ k: v
+ for k, v in self.setup_params.items() if k not in excluded_params
+ }
+ setup_params_table = pd.DataFrame(
+ list(filtered_setup_params.items()),
+ columns=['Parameter', 'Value'])
+
+ best_model_params = pd.DataFrame(
+ self.best_model.get_params().items(),
+ columns=['Parameter', 'Value'])
+ best_model_params.to_csv(
+ os.path.join(self.output_dir, 'best_model.csv'),
+ index=False)
+ self.results.to_csv(os.path.join(
+ self.output_dir, "comparison_results.csv"))
+ self.test_result_df.to_csv(os.path.join(
+ self.output_dir, "test_results.csv"))
+
+ plots_html = ""
+ length = len(self.plots)
+ for i, (plot_name, plot_path) in enumerate(self.plots.items()):
+ encoded_image = self.encode_image_to_base64(plot_path)
+ plots_html += f"""
+
+
{plot_name.capitalize()}
+
+
+ """
+ if i < length - 1:
+ plots_html += "
"
+
+ tree_plots = ""
+ for i, tree in enumerate(self.trees):
+ if tree:
+ tree_plots += f"""
+
+
Tree {i+1}
+
+
+ """
+
+ analyzer = FeatureImportanceAnalyzer(
+ data=self.data,
+ target_col=self.target_col,
+ task_type=self.task_type,
+ output_dir=self.output_dir)
+ feature_importance_html = analyzer.run()
+
+ html_content = f"""
+ {get_html_template()}
+ PyCaret Model Training Report
+
+
+ Setup & Best Model
+
+ Best Model Plots
+
+ Feature Importance
+
+ Explainer
+
+
+
+
Setup Parameters
+
+ Parameter | Value |
+ {setup_params_table.to_html(
+ index=False, header=False, classes='table')}
+
+
If you want to know all the experiment setup parameters,
+ please check the PyCaret documentation for
+ the classification/regression exp
function.
+
Best Model: {model_name}
+
+ Parameter | Value |
+ {best_model_params.to_html(
+ index=False, header=False, classes='table')}
+
+
Comparison Results on the Cross-Validation Set
+
+ {self.results.to_html(index=False, classes='table')}
+
+
Results on the Test Set for the best model
+
+ {self.test_result_df.to_html(index=False, classes='table')}
+
+
+
+
Best Model Plots on the testing set
+ {plots_html}
+
+
+ {feature_importance_html}
+
+
+ {self.plots_explainer_html}
+ {tree_plots}
+
+ {get_html_closing()}
+ """
+
+ with open(os.path.join(
+ self.output_dir, "comparison_result.html"), "w") as file:
+ file.write(html_content)
+
+ def save_dashboard(self):
+ raise NotImplementedError("Subclasses should implement this method")
+
+ def generate_plots_explainer(self):
+ raise NotImplementedError("Subclasses should implement this method")
+
+ # not working now
+ def generate_tree_plots(self):
+ from sklearn.ensemble import RandomForestClassifier, \
+ RandomForestRegressor
+ from xgboost import XGBClassifier, XGBRegressor
+ from explainerdashboard.explainers import RandomForestExplainer
+
+ LOG.info("Generating tree plots")
+ X_test = self.exp.X_test_transformed.copy()
+ y_test = self.exp.y_test_transformed
+
+ is_rf = isinstance(self.best_model, RandomForestClassifier) or \
+ isinstance(self.best_model, RandomForestRegressor)
+
+ is_xgb = isinstance(self.best_model, XGBClassifier) or \
+ isinstance(self.best_model, XGBRegressor)
+
+ try:
+ if is_rf:
+ num_trees = self.best_model.n_estimators
+ if is_xgb:
+ num_trees = len(self.best_model.get_booster().get_dump())
+ explainer = RandomForestExplainer(self.best_model, X_test, y_test)
+ for i in range(num_trees):
+ fig = explainer.decisiontree_encoded(tree_idx=i, index=0)
+ LOG.info(f"Tree {i+1}")
+ LOG.info(fig)
+ self.trees.append(fig)
+ except Exception as e:
+ LOG.error(f"Error generating tree plots: {e}")
+
+ def run(self):
+ self.load_data()
+ self.setup_pycaret()
+ self.train_model()
+ self.save_model()
+ self.generate_plots()
+ self.generate_plots_explainer()
+ self.generate_tree_plots()
+ self.save_html_report()
+ # self.save_dashboard()
diff -r 000000000000 -r 1f20fe57fdee dashboard.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/dashboard.py Wed Dec 11 04:59:43 2024 +0000
@@ -0,0 +1,159 @@
+import logging
+from typing import Any, Dict, Optional
+
+from pycaret.utils.generic import get_label_encoder
+
+logging.basicConfig(level=logging.DEBUG)
+LOG = logging.getLogger(__name__)
+
+
+def generate_classifier_explainer_dashboard(
+ exp,
+ estimator,
+ display_format: str = "dash",
+ dashboard_kwargs: Optional[Dict[str, Any]] = None,
+ run_kwargs: Optional[Dict[str, Any]] = None,
+ **kwargs,):
+
+ """
+ This function is changed from pycaret.classification.oop.dashboard()
+
+ This function generates the interactive dashboard for a trained model.
+ The dashboard is implemented using
+ ExplainerDashboard (explainerdashboard.readthedocs.io)
+
+
+ estimator: scikit-learn compatible object
+ Trained model object
+
+
+ display_format: str, default = 'dash'
+ Render mode for the dashboard. The default is set to ``dash``
+ which will
+ render a dashboard in browser. There are four possible options:
+
+ - 'dash' - displays the dashboard in browser
+ - 'inline' - displays the dashboard in the jupyter notebook cell.
+ - 'jupyterlab' - displays the dashboard in jupyterlab pane.
+ - 'external' - displays the dashboard in a separate tab.
+ (use in Colab)
+
+
+ dashboard_kwargs: dict, default = {} (empty dict)
+ Dictionary of arguments passed to the ``ExplainerDashboard`` class.
+
+
+ run_kwargs: dict, default = {} (empty dict)
+ Dictionary of arguments passed to the ``run``
+ method of ``ExplainerDashboard``.
+
+
+ **kwargs:
+ Additional keyword arguments to pass to the ``ClassifierExplainer``
+ or ``RegressionExplainer`` class.
+
+
+ Returns:
+ ExplainerDashboard
+ """
+
+ dashboard_kwargs = dashboard_kwargs or {}
+ run_kwargs = run_kwargs or {}
+
+ from explainerdashboard import ClassifierExplainer, ExplainerDashboard
+
+ le = get_label_encoder(exp.pipeline)
+ if le:
+ labels_ = list(le.classes_)
+ else:
+ labels_ = None
+
+ # Replaceing chars which dash doesnt accept for column name `.` , `{`, `}`
+
+ X_test_df = exp.X_test_transformed.copy()
+ LOG.info(X_test_df)
+ X_test_df.columns = [
+ col.replace(".", "__").replace("{", "__").replace("}", "__")
+ for col in X_test_df.columns
+ ]
+
+ explainer = ClassifierExplainer(
+ estimator, X_test_df, exp.y_test_transformed, labels=labels_, **kwargs
+ )
+ return ExplainerDashboard(
+ explainer, mode=display_format,
+ contributions=False, whatif=False,
+ **dashboard_kwargs
+ )
+
+
+def generate_regression_explainer_dashboard(
+ exp,
+ estimator,
+ display_format: str = "dash",
+ dashboard_kwargs: Optional[Dict[str, Any]] = None,
+ run_kwargs: Optional[Dict[str, Any]] = None,
+ **kwargs,):
+
+ """
+ This function is changed from pycaret.regression.oop.dashboard()
+
+ This function generates the interactive dashboard for a trained model.
+ The dashboard is implemented using ExplainerDashboard
+ (explainerdashboard.readthedocs.io)
+
+
+ estimator: scikit-learn compatible object
+ Trained model object
+
+
+ display_format: str, default = 'dash'
+ Render mode for the dashboard. The default is set to ``dash``
+ which will
+ render a dashboard in browser. There are four possible options:
+
+ - 'dash' - displays the dashboard in browser
+ - 'inline' - displays the dashboard in the jupyter notebook cell.
+ - 'jupyterlab' - displays the dashboard in jupyterlab pane.
+ - 'external' - displays the dashboard in a separate tab.
+ (use in Colab)
+
+
+ dashboard_kwargs: dict, default = {} (empty dict)
+ Dictionary of arguments passed to the ``ExplainerDashboard`` class.
+
+
+ run_kwargs: dict, default = {} (empty dict)
+ Dictionary of arguments passed to the ``run`` method
+ of ``ExplainerDashboard``.
+
+
+ **kwargs:
+ Additional keyword arguments to pass to the
+ ``ClassifierExplainer`` or
+ ``RegressionExplainer`` class.
+
+
+ Returns:
+ ExplainerDashboard
+ """
+
+ dashboard_kwargs = dashboard_kwargs or {}
+ run_kwargs = run_kwargs or {}
+
+ from explainerdashboard import ExplainerDashboard, RegressionExplainer
+
+ # Replaceing chars which dash doesnt accept for column name `.` , `{`, `}`
+ X_test_df = exp.X_test_transformed.copy()
+ X_test_df.columns = [
+ col.replace(".", "__").replace("{", "__").replace("}", "__")
+ for col in X_test_df.columns
+ ]
+ explainer = RegressionExplainer(
+ estimator, X_test_df, exp.y_test_transformed, **kwargs
+ )
+ return ExplainerDashboard(
+ explainer, mode=display_format, contributions=False,
+ whatif=False, shap_interaction=False, decision_trees=False,
+ **dashboard_kwargs
+ )
diff -r 000000000000 -r 1f20fe57fdee feature_importance.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/feature_importance.py Wed Dec 11 04:59:43 2024 +0000
@@ -0,0 +1,171 @@
+import base64
+import logging
+import os
+
+import matplotlib.pyplot as plt
+
+import pandas as pd
+
+from pycaret.classification import ClassificationExperiment
+from pycaret.regression import RegressionExperiment
+
+logging.basicConfig(level=logging.DEBUG)
+LOG = logging.getLogger(__name__)
+
+
+class FeatureImportanceAnalyzer:
+ def __init__(
+ self,
+ task_type,
+ output_dir,
+ data_path=None,
+ data=None,
+ target_col=None):
+
+ if data is not None:
+ self.data = data
+ LOG.info("Data loaded from memory")
+ else:
+ self.target_col = target_col
+ self.data = pd.read_csv(data_path, sep=None, engine='python')
+ self.data.columns = self.data.columns.str.replace('.', '_')
+ self.data = self.data.fillna(self.data.median(numeric_only=True))
+ self.task_type = task_type
+ self.target = self.data.columns[int(target_col) - 1]
+ self.exp = ClassificationExperiment() \
+ if task_type == 'classification' \
+ else RegressionExperiment()
+ self.plots = {}
+ self.output_dir = output_dir
+
+ def setup_pycaret(self):
+ LOG.info("Initializing PyCaret")
+ setup_params = {
+ 'target': self.target,
+ 'session_id': 123,
+ 'html': True,
+ 'log_experiment': False,
+ 'system_log': False
+ }
+ LOG.info(self.task_type)
+ LOG.info(self.exp)
+ self.exp.setup(self.data, **setup_params)
+
+ # def save_coefficients(self):
+ # model = self.exp.create_model('lr')
+ # coef_df = pd.DataFrame({
+ # 'Feature': self.data.columns.drop(self.target),
+ # 'Coefficient': model.coef_[0]
+ # })
+ # coef_html = coef_df.to_html(index=False)
+ # return coef_html
+
+ def save_tree_importance(self):
+ model = self.exp.create_model('rf')
+ importances = model.feature_importances_
+ processed_features = self.exp.get_config('X_transformed').columns
+ LOG.debug(f"Feature importances: {importances}")
+ LOG.debug(f"Features: {processed_features}")
+ feature_importances = pd.DataFrame({
+ 'Feature': processed_features,
+ 'Importance': importances
+ }).sort_values(by='Importance', ascending=False)
+ plt.figure(figsize=(10, 6))
+ plt.barh(
+ feature_importances['Feature'],
+ feature_importances['Importance'])
+ plt.xlabel('Importance')
+ plt.title('Feature Importance (Random Forest)')
+ plot_path = os.path.join(
+ self.output_dir,
+ 'tree_importance.png')
+ plt.savefig(plot_path)
+ plt.close()
+ self.plots['tree_importance'] = plot_path
+
+ def save_shap_values(self):
+ model = self.exp.create_model('lightgbm')
+ import shap
+ explainer = shap.Explainer(model)
+ shap_values = explainer.shap_values(
+ self.exp.get_config('X_transformed'))
+ shap.summary_plot(shap_values,
+ self.exp.get_config('X_transformed'), show=False)
+ plt.title('Shap (LightGBM)')
+ plot_path = os.path.join(
+ self.output_dir, 'shap_summary.png')
+ plt.savefig(plot_path)
+ plt.close()
+ self.plots['shap_summary'] = plot_path
+
+ def generate_feature_importance(self):
+ # coef_html = self.save_coefficients()
+ self.save_tree_importance()
+ self.save_shap_values()
+
+ def encode_image_to_base64(self, img_path):
+ with open(img_path, 'rb') as img_file:
+ return base64.b64encode(img_file.read()).decode('utf-8')
+
+ def generate_html_report(self):
+ LOG.info("Generating HTML report")
+
+ # Read and encode plot images
+ plots_html = ""
+ for plot_name, plot_path in self.plots.items():
+ encoded_image = self.encode_image_to_base64(plot_path)
+ plots_html += f"""
+
+
{'Feature importance analysis from a'
+ 'trained Random Forest'
+ if plot_name == 'tree_importance'
+ else 'SHAP Summary from a trained lightgbm'}
+
{'Use gini impurity for'
+ 'calculating feature importance for classification'
+ 'and Variance Reduction for regression'
+ if plot_name == 'tree_importance'
+ else ''}
+
+
+ """
+
+ # Generate HTML content with tabs
+ html_content = f"""
+ PyCaret Feature Importance Report
+ {plots_html}
+ """
+
+ return html_content
+
+ def run(self):
+ LOG.info("Running feature importance analysis")
+ self.setup_pycaret()
+ self.generate_feature_importance()
+ html_content = self.generate_html_report()
+ LOG.info("Feature importance analysis completed")
+ return html_content
+
+
+if __name__ == "__main__":
+ import argparse
+ parser = argparse.ArgumentParser(description="Feature Importance Analysis")
+ parser.add_argument(
+ "--data_path", type=str, help="Path to the dataset")
+ parser.add_argument(
+ "--target_col", type=int,
+ help="Index of the target column (1-based)")
+ parser.add_argument(
+ "--task_type", type=str,
+ choices=["classification", "regression"],
+ help="Task type: classification or regression")
+ parser.add_argument(
+ "--output_dir",
+ type=str,
+ help="Directory to save the outputs")
+ args = parser.parse_args()
+
+ analyzer = FeatureImportanceAnalyzer(
+ args.data_path, args.target_col,
+ args.task_type, args.output_dir)
+ analyzer.run()
diff -r 000000000000 -r 1f20fe57fdee pycaret_classification.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/pycaret_classification.py Wed Dec 11 04:59:43 2024 +0000
@@ -0,0 +1,204 @@
+import logging
+
+from base_model_trainer import BaseModelTrainer
+
+from dashboard import generate_classifier_explainer_dashboard
+
+from pycaret.classification import ClassificationExperiment
+
+from utils import add_hr_to_html, add_plot_to_html
+
+LOG = logging.getLogger(__name__)
+
+
+class ClassificationModelTrainer(BaseModelTrainer):
+ def __init__(
+ self,
+ input_file,
+ target_col,
+ output_dir,
+ task_type,
+ random_seed,
+ test_file=None,
+ **kwargs):
+ super().__init__(
+ input_file,
+ target_col,
+ output_dir,
+ task_type,
+ random_seed,
+ test_file,
+ **kwargs)
+ self.exp = ClassificationExperiment()
+
+ def save_dashboard(self):
+ LOG.info("Saving explainer dashboard")
+ dashboard = generate_classifier_explainer_dashboard(self.exp,
+ self.best_model)
+ dashboard.save_html("dashboard.html")
+
+ def generate_plots(self):
+ LOG.info("Generating and saving plots")
+ plots = ['confusion_matrix', 'auc', 'threshold', 'pr',
+ 'error', 'class_report', 'learning', 'calibration',
+ 'vc', 'dimension', 'manifold', 'rfe', 'feature',
+ 'feature_all']
+ for plot_name in plots:
+ try:
+ if plot_name == 'auc' and not self.exp.is_multiclass:
+ plot_path = self.exp.plot_model(self.best_model,
+ plot=plot_name,
+ save=True,
+ plot_kwargs={
+ 'micro': False,
+ 'macro': False,
+ 'per_class': False,
+ 'binary': True
+ }
+ )
+ self.plots[plot_name] = plot_path
+ continue
+
+ plot_path = self.exp.plot_model(self.best_model,
+ plot=plot_name, save=True)
+ self.plots[plot_name] = plot_path
+ except Exception as e:
+ LOG.error(f"Error generating plot {plot_name}: {e}")
+ continue
+
+ def generate_plots_explainer(self):
+ LOG.info("Generating and saving plots from explainer")
+
+ from explainerdashboard import ClassifierExplainer
+
+ X_test = self.exp.X_test_transformed.copy()
+ y_test = self.exp.y_test_transformed
+
+ explainer = ClassifierExplainer(self.best_model, X_test, y_test)
+ self.expaliner = explainer
+ plots_explainer_html = ""
+
+ try:
+ fig_importance = explainer.plot_importances()
+ plots_explainer_html += add_plot_to_html(fig_importance)
+ plots_explainer_html += add_hr_to_html()
+ except Exception as e:
+ LOG.error(f"Error generating plot importance(mean shap): {e}")
+
+ try:
+ fig_importance_perm = explainer.plot_importances(
+ kind="permutation")
+ plots_explainer_html += add_plot_to_html(fig_importance_perm)
+ plots_explainer_html += add_hr_to_html()
+ except Exception as e:
+ LOG.error(f"Error generating plot importance(permutation): {e}")
+
+ # try:
+ # fig_shap = explainer.plot_shap_summary()
+ # plots_explainer_html += add_plot_to_html(fig_shap,
+ # include_plotlyjs=False)
+ # except Exception as e:
+ # LOG.error(f"Error generating plot shap: {e}")
+
+ # try:
+ # fig_contributions = explainer.plot_contributions(
+ # index=0)
+ # plots_explainer_html += add_plot_to_html(
+ # fig_contributions, include_plotlyjs=False)
+ # except Exception as e:
+ # LOG.error(f"Error generating plot contributions: {e}")
+
+ # try:
+ # for feature in self.features_name:
+ # fig_dependence = explainer.plot_dependence(col=feature)
+ # plots_explainer_html += add_plot_to_html(fig_dependence)
+ # except Exception as e:
+ # LOG.error(f"Error generating plot dependencies: {e}")
+
+ try:
+ for feature in self.features_name:
+ fig_pdp = explainer.plot_pdp(feature)
+ plots_explainer_html += add_plot_to_html(fig_pdp)
+ plots_explainer_html += add_hr_to_html()
+ except Exception as e:
+ LOG.error(f"Error generating plot pdp: {e}")
+
+ try:
+ for feature in self.features_name:
+ fig_interaction = explainer.plot_interaction(
+ col=feature, interact_col=feature)
+ plots_explainer_html += add_plot_to_html(fig_interaction)
+ except Exception as e:
+ LOG.error(f"Error generating plot interactions: {e}")
+
+ try:
+ for feature in self.features_name:
+ fig_interactions_importance = \
+ explainer.plot_interactions_importance(
+ col=feature)
+ plots_explainer_html += add_plot_to_html(
+ fig_interactions_importance)
+ plots_explainer_html += add_hr_to_html()
+ except Exception as e:
+ LOG.error(f"Error generating plot interactions importance: {e}")
+
+ # try:
+ # for feature in self.features_name:
+ # fig_interactions_detailed = \
+ # explainer.plot_interactions_detailed(
+ # col=feature)
+ # plots_explainer_html += add_plot_to_html(
+ # fig_interactions_detailed)
+ # except Exception as e:
+ # LOG.error(f"Error generating plot interactions detailed: {e}")
+
+ try:
+ fig_precision = explainer.plot_precision()
+ plots_explainer_html += add_plot_to_html(fig_precision)
+ plots_explainer_html += add_hr_to_html()
+ except Exception as e:
+ LOG.error(f"Error generating plot precision: {e}")
+
+ try:
+ fig_cumulative_precision = explainer.plot_cumulative_precision()
+ plots_explainer_html += add_plot_to_html(fig_cumulative_precision)
+ plots_explainer_html += add_hr_to_html()
+ except Exception as e:
+ LOG.error(f"Error generating plot cumulative precision: {e}")
+
+ try:
+ fig_classification = explainer.plot_classification()
+ plots_explainer_html += add_plot_to_html(fig_classification)
+ plots_explainer_html += add_hr_to_html()
+ except Exception as e:
+ LOG.error(f"Error generating plot classification: {e}")
+
+ try:
+ fig_confusion_matrix = explainer.plot_confusion_matrix()
+ plots_explainer_html += add_plot_to_html(fig_confusion_matrix)
+ plots_explainer_html += add_hr_to_html()
+ except Exception as e:
+ LOG.error(f"Error generating plot confusion matrix: {e}")
+
+ try:
+ fig_lift_curve = explainer.plot_lift_curve()
+ plots_explainer_html += add_plot_to_html(fig_lift_curve)
+ plots_explainer_html += add_hr_to_html()
+ except Exception as e:
+ LOG.error(f"Error generating plot lift curve: {e}")
+
+ try:
+ fig_roc_auc = explainer.plot_roc_auc()
+ plots_explainer_html += add_plot_to_html(fig_roc_auc)
+ plots_explainer_html += add_hr_to_html()
+ except Exception as e:
+ LOG.error(f"Error generating plot roc auc: {e}")
+
+ try:
+ fig_pr_auc = explainer.plot_pr_auc()
+ plots_explainer_html += add_plot_to_html(fig_pr_auc)
+ plots_explainer_html += add_hr_to_html()
+ except Exception as e:
+ LOG.error(f"Error generating plot pr auc: {e}")
+
+ self.plots_explainer_html = plots_explainer_html
diff -r 000000000000 -r 1f20fe57fdee pycaret_macros.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/pycaret_macros.xml Wed Dec 11 04:59:43 2024 +0000
@@ -0,0 +1,25 @@
+
+ 3.3.2
+ 0
+ @PYCARET_VERSION@+@SUFFIX@
+ 21.05
+
+
+ quay.io/goeckslab/galaxy-pycaret:3.3.2
+
+
+
+
+ @Manual{PyCaret,
+ author = {Moez Ali},
+ title = {PyCaret: An open source, low-code machine learning library in Python},
+ year = {2020},
+ month = {April},
+ note = {PyCaret version 1.0.0},
+ url = {https://www.pycaret.org}
+}
+
+
+
+
+
\ No newline at end of file
diff -r 000000000000 -r 1f20fe57fdee pycaret_predict.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/pycaret_predict.py Wed Dec 11 04:59:43 2024 +0000
@@ -0,0 +1,200 @@
+import argparse
+import logging
+import tempfile
+
+import h5py
+
+import joblib
+
+import pandas as pd
+
+from pycaret.classification import ClassificationExperiment
+from pycaret.regression import RegressionExperiment
+
+from sklearn.metrics import average_precision_score
+
+from utils import encode_image_to_base64, get_html_closing, get_html_template
+
+LOG = logging.getLogger(__name__)
+
+
+class PyCaretModelEvaluator:
+ def __init__(self, model_path, task, target):
+ self.model_path = model_path
+ self.task = task.lower()
+ self.model = self.load_h5_model()
+ self.target = target if target != "None" else None
+
+ def load_h5_model(self):
+ """Load a PyCaret model from an HDF5 file."""
+ with h5py.File(self.model_path, 'r') as f:
+ model_bytes = bytes(f['model'][()])
+ with tempfile.NamedTemporaryFile(delete=False) as temp_file:
+ temp_file.write(model_bytes)
+ temp_file.seek(0)
+ loaded_model = joblib.load(temp_file.name)
+ return loaded_model
+
+ def evaluate(self, data_path):
+ """Evaluate the model using the specified data."""
+ raise NotImplementedError("Subclasses must implement this method")
+
+
+class ClassificationEvaluator(PyCaretModelEvaluator):
+ def evaluate(self, data_path):
+ metrics = None
+ plot_paths = {}
+ data = pd.read_csv(data_path, engine='python', sep=None)
+ if self.target:
+ exp = ClassificationExperiment()
+ names = data.columns.to_list()
+ LOG.error(f"Column names: {names}")
+ target_index = int(self.target)-1
+ target_name = names[target_index]
+ exp.setup(data, target=target_name, test_data=data, index=False)
+ exp.add_metric(id='PR-AUC-Weighted',
+ name='PR-AUC-Weighted',
+ target='pred_proba',
+ score_func=average_precision_score,
+ average='weighted')
+ predictions = exp.predict_model(self.model)
+ metrics = exp.pull()
+ plots = ['confusion_matrix', 'auc', 'threshold', 'pr',
+ 'error', 'class_report', 'learning', 'calibration',
+ 'vc', 'dimension', 'manifold', 'rfe', 'feature',
+ 'feature_all']
+ for plot_name in plots:
+ try:
+ if plot_name == 'auc' and not exp.is_multiclass:
+ plot_path = exp.plot_model(self.model,
+ plot=plot_name,
+ save=True,
+ plot_kwargs={
+ 'micro': False,
+ 'macro': False,
+ 'per_class': False,
+ 'binary': True
+ })
+ plot_paths[plot_name] = plot_path
+ continue
+
+ plot_path = exp.plot_model(self.model,
+ plot=plot_name, save=True)
+ plot_paths[plot_name] = plot_path
+ except Exception as e:
+ LOG.error(f"Error generating plot {plot_name}: {e}")
+ continue
+ generate_html_report(plot_paths, metrics)
+
+ else:
+ exp = ClassificationExperiment()
+ exp.setup(data, target=None, test_data=data, index=False)
+ predictions = exp.predict_model(self.model, data=data)
+
+ return predictions, metrics, plot_paths
+
+
+class RegressionEvaluator(PyCaretModelEvaluator):
+ def evaluate(self, data_path):
+ metrics = None
+ plot_paths = {}
+ data = pd.read_csv(data_path, engine='python', sep=None)
+ if self.target:
+ names = data.columns.to_list()
+ target_index = int(self.target)-1
+ target_name = names[target_index]
+ exp = RegressionExperiment()
+ exp.setup(data, target=target_name, test_data=data, index=False)
+ predictions = exp.predict_model(self.model)
+ metrics = exp.pull()
+ plots = ['residuals', 'error', 'cooks',
+ 'learning', 'vc', 'manifold',
+ 'rfe', 'feature', 'feature_all']
+ for plot_name in plots:
+ try:
+ plot_path = exp.plot_model(self.model,
+ plot=plot_name, save=True)
+ plot_paths[plot_name] = plot_path
+ except Exception as e:
+ LOG.error(f"Error generating plot {plot_name}: {e}")
+ continue
+ generate_html_report(plot_paths, metrics)
+ else:
+ exp = RegressionExperiment()
+ exp.setup(data, target=None, test_data=data, index=False)
+ predictions = exp.predict_model(self.model, data=data)
+
+ return predictions, metrics, plot_paths
+
+
+def generate_html_report(plots, metrics):
+ """Generate an HTML evaluation report."""
+ plots_html = ""
+ for plot_name, plot_path in plots.items():
+ encoded_image = encode_image_to_base64(plot_path)
+ plots_html += f"""
+
+
{plot_name.capitalize()}
+
+
+
+ """
+
+ metrics_html = metrics.to_html(index=False, classes="table")
+
+ html_content = f"""
+ {get_html_template()}
+ Model Evaluation Report
+
+
+
+
Plots
+ {plots_html}
+
+ {get_html_closing()}
+ """
+
+ # Save HTML report
+ with open("evaluation_report.html", "w") as html_file:
+ html_file.write(html_content)
+
+
+if __name__ == "__main__":
+ parser = argparse.ArgumentParser(
+ description="Evaluate a PyCaret model stored in HDF5 format.")
+ parser.add_argument("--model_path",
+ type=str,
+ help="Path to the HDF5 model file.")
+ parser.add_argument("--data_path",
+ type=str,
+ help="Path to the evaluation data CSV file.")
+ parser.add_argument("--task",
+ type=str,
+ choices=["classification", "regression"],
+ help="Specify the task: classification or regression.")
+ parser.add_argument("--target",
+ default=None,
+ help="Column number of the target")
+ args = parser.parse_args()
+
+ if args.task == "classification":
+ evaluator = ClassificationEvaluator(
+ args.model_path, args.task, args.target)
+ elif args.task == "regression":
+ evaluator = RegressionEvaluator(
+ args.model_path, args.task, args.target)
+ else:
+ raise ValueError(
+ "Unsupported task type. Use 'classification' or 'regression'.")
+
+ predictions, metrics, plots = evaluator.evaluate(args.data_path)
+
+ predictions.to_csv("predictions.csv", index=False)
diff -r 000000000000 -r 1f20fe57fdee pycaret_predict.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/pycaret_predict.xml Wed Dec 11 04:59:43 2024 +0000
@@ -0,0 +1,61 @@
+
+ predicts/evaluates your pycaret ML model on a dataset.
+
+ pycaret_macros.xml
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ target_feature
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ This tool uses PyCaret to evaluate a machine learning model or do prediction.
+
+ **Outputs**:
+
+ - **prediction**: The prediction results on the dataset in a csv format.
+
+ - **report**: The evaluation report is generated in HTML format.
+ if you upload a dataset with a target column and select the target column in the target_feature input field.
+
+
+
+
\ No newline at end of file
diff -r 000000000000 -r 1f20fe57fdee pycaret_regression.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/pycaret_regression.py Wed Dec 11 04:59:43 2024 +0000
@@ -0,0 +1,134 @@
+import logging
+
+from base_model_trainer import BaseModelTrainer
+
+from dashboard import generate_regression_explainer_dashboard
+
+from pycaret.regression import RegressionExperiment
+
+from utils import add_hr_to_html, add_plot_to_html
+
+LOG = logging.getLogger(__name__)
+
+
+class RegressionModelTrainer(BaseModelTrainer):
+ def __init__(
+ self,
+ input_file,
+ target_col,
+ output_dir,
+ task_type,
+ random_seed,
+ test_file=None,
+ **kwargs):
+ super().__init__(
+ input_file,
+ target_col,
+ output_dir,
+ task_type,
+ random_seed,
+ test_file,
+ **kwargs)
+ self.exp = RegressionExperiment()
+
+ def save_dashboard(self):
+ LOG.info("Saving explainer dashboard")
+ dashboard = generate_regression_explainer_dashboard(self.exp,
+ self.best_model)
+ dashboard.save_html("dashboard.html")
+
+ def generate_plots(self):
+ LOG.info("Generating and saving plots")
+ plots = ['residuals', 'error', 'cooks',
+ 'learning', 'vc', 'manifold',
+ 'rfe', 'feature', 'feature_all']
+ for plot_name in plots:
+ try:
+ plot_path = self.exp.plot_model(self.best_model,
+ plot=plot_name, save=True)
+ self.plots[plot_name] = plot_path
+ except Exception as e:
+ LOG.error(f"Error generating plot {plot_name}: {e}")
+ continue
+
+ def generate_plots_explainer(self):
+ LOG.info("Generating and saving plots from explainer")
+
+ from explainerdashboard import RegressionExplainer
+
+ X_test = self.exp.X_test_transformed.copy()
+ y_test = self.exp.y_test_transformed
+
+ explainer = RegressionExplainer(self.best_model, X_test, y_test)
+ self.expaliner = explainer
+ plots_explainer_html = ""
+
+ try:
+ fig_importance = explainer.plot_importances()
+ plots_explainer_html += add_plot_to_html(fig_importance)
+ plots_explainer_html += add_hr_to_html()
+ except Exception as e:
+ LOG.error(f"Error generating plot importance: {e}")
+
+ try:
+ fig_importance_permutation = \
+ explainer.plot_importances_permutation(
+ kind="permutation")
+ plots_explainer_html += add_plot_to_html(
+ fig_importance_permutation)
+ plots_explainer_html += add_hr_to_html()
+ except Exception as e:
+ LOG.error(f"Error generating plot importance permutation: {e}")
+
+ try:
+ for feature in self.features_name:
+ fig_shap = explainer.plot_pdp(feature)
+ plots_explainer_html += add_plot_to_html(fig_shap)
+ plots_explainer_html += add_hr_to_html()
+ except Exception as e:
+ LOG.error(f"Error generating plot shap dependence: {e}")
+
+ # try:
+ # for feature in self.features_name:
+ # fig_interaction = explainer.plot_interaction(col=feature)
+ # plots_explainer_html += add_plot_to_html(fig_interaction)
+ # except Exception as e:
+ # LOG.error(f"Error generating plot shap interaction: {e}")
+
+ try:
+ for feature in self.features_name:
+ fig_interactions_importance = \
+ explainer.plot_interactions_importance(
+ col=feature)
+ plots_explainer_html += add_plot_to_html(
+ fig_interactions_importance)
+ plots_explainer_html += add_hr_to_html()
+ except Exception as e:
+ LOG.error(f"Error generating plot shap summary: {e}")
+
+ # Regression specific plots
+ try:
+ fig_pred_actual = explainer.plot_predicted_vs_actual()
+ plots_explainer_html += add_plot_to_html(fig_pred_actual)
+ plots_explainer_html += add_hr_to_html()
+ except Exception as e:
+ LOG.error(f"Error generating plot prediction vs actual: {e}")
+
+ try:
+ fig_residuals = explainer.plot_residuals()
+ plots_explainer_html += add_plot_to_html(fig_residuals)
+ plots_explainer_html += add_hr_to_html()
+ except Exception as e:
+ LOG.error(f"Error generating plot residuals: {e}")
+
+ try:
+ for feature in self.features_name:
+ fig_residuals_vs_feature = \
+ explainer.plot_residuals_vs_feature(feature)
+ plots_explainer_html += add_plot_to_html(
+ fig_residuals_vs_feature)
+ plots_explainer_html += add_hr_to_html()
+ except Exception as e:
+ LOG.error(f"Error generating plot residuals vs feature: {e}")
+
+ self.plots_explainer_html = plots_explainer_html
diff -r 000000000000 -r 1f20fe57fdee pycaret_train.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/pycaret_train.py Wed Dec 11 04:59:43 2024 +0000
@@ -0,0 +1,117 @@
+import argparse
+import logging
+
+from pycaret_classification import ClassificationModelTrainer
+
+from pycaret_regression import RegressionModelTrainer
+
+logging.basicConfig(level=logging.DEBUG)
+LOG = logging.getLogger(__name__)
+
+
+def main():
+ parser = argparse.ArgumentParser()
+ parser.add_argument("--input_file", help="Path to the input file")
+ parser.add_argument("--target_col", help="Column number of the target")
+ parser.add_argument("--output_dir",
+ help="Path to the output directory")
+ parser.add_argument("--model_type",
+ choices=["classification", "regression"],
+ help="Type of the model")
+ parser.add_argument("--train_size", type=float,
+ default=None,
+ help="Train size for PyCaret setup")
+ parser.add_argument("--normalize", action="store_true",
+ default=None,
+ help="Normalize data for PyCaret setup")
+ parser.add_argument("--feature_selection", action="store_true",
+ default=None,
+ help="Perform feature selection for PyCaret setup")
+ parser.add_argument("--cross_validation", action="store_true",
+ default=None,
+ help="Perform cross-validation for PyCaret setup")
+ parser.add_argument("--cross_validation_folds", type=int,
+ default=None,
+ help="Number of cross-validation folds \
+ for PyCaret setup")
+ parser.add_argument("--remove_outliers", action="store_true",
+ default=None,
+ help="Remove outliers for PyCaret setup")
+ parser.add_argument("--remove_multicollinearity", action="store_true",
+ default=None,
+ help="Remove multicollinearity for PyCaret setup")
+ parser.add_argument("--polynomial_features", action="store_true",
+ default=None,
+ help="Generate polynomial features for PyCaret setup")
+ parser.add_argument("--feature_interaction", action="store_true",
+ default=None,
+ help="Generate feature interactions for PyCaret setup")
+ parser.add_argument("--feature_ratio", action="store_true",
+ default=None,
+ help="Generate feature ratios for PyCaret setup")
+ parser.add_argument("--fix_imbalance", action="store_true",
+ default=None,
+ help="Fix class imbalance for PyCaret setup")
+ parser.add_argument("--models", nargs='+',
+ default=None,
+ help="Selected models for training")
+ parser.add_argument("--random_seed", type=int,
+ default=42,
+ help="Random seed for PyCaret setup")
+ parser.add_argument("--test_file", type=str, default=None,
+ help="Path to the test data file")
+
+ args = parser.parse_args()
+
+ model_kwargs = {
+ "train_size": args.train_size,
+ "normalize": args.normalize,
+ "feature_selection": args.feature_selection,
+ "cross_validation": args.cross_validation,
+ "cross_validation_folds": args.cross_validation_folds,
+ "remove_outliers": args.remove_outliers,
+ "remove_multicollinearity": args.remove_multicollinearity,
+ "polynomial_features": args.polynomial_features,
+ "feature_interaction": args.feature_interaction,
+ "feature_ratio": args.feature_ratio,
+ "fix_imbalance": args.fix_imbalance,
+ }
+ LOG.info(f"Model kwargs: {model_kwargs}")
+
+ # Remove None values from model_kwargs
+
+ LOG.info(f"Model kwargs 2: {model_kwargs}")
+ if args.models:
+ model_kwargs["models"] = args.models[0].split(",")
+
+ model_kwargs = {k: v for k, v in model_kwargs.items() if v is not None}
+
+ if args.model_type == "classification":
+ trainer = ClassificationModelTrainer(
+ args.input_file,
+ args.target_col,
+ args.output_dir,
+ args.model_type,
+ args.random_seed,
+ args.test_file,
+ **model_kwargs)
+ elif args.model_type == "regression":
+ if "fix_imbalance" in model_kwargs:
+ del model_kwargs["fix_imbalance"]
+ trainer = RegressionModelTrainer(
+ args.input_file,
+ args.target_col,
+ args.output_dir,
+ args.model_type,
+ args.random_seed,
+ args.test_file,
+ **model_kwargs)
+ else:
+ LOG.error("Invalid model type. Please choose \
+ 'classification' or 'regression'.")
+ return
+ trainer.run()
+
+
+if __name__ == "__main__":
+ main()
diff -r 000000000000 -r 1f20fe57fdee test-data/auto-mpg.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/auto-mpg.tsv Wed Dec 11 04:59:43 2024 +0000
@@ -0,0 +1,399 @@
+MPG Cylinders Displacement Horsepower Weight Acceleration ModelYear Origin
+18.0 8 307.0 130.0 3504. 12.0 70 1
+15.0 8 350.0 165.0 3693. 11.5 70 1
+18.0 8 318.0 150.0 3436. 11.0 70 1
+16.0 8 304.0 150.0 3433. 12.0 70 1
+17.0 8 302.0 140.0 3449. 10.5 70 1
+15.0 8 429.0 198.0 4341. 10.0 70 1
+14.0 8 454.0 220.0 4354. 9.0 70 1
+14.0 8 440.0 215.0 4312. 8.5 70 1
+14.0 8 455.0 225.0 4425. 10.0 70 1
+15.0 8 390.0 190.0 3850. 8.5 70 1
+15.0 8 383.0 170.0 3563. 10.0 70 1
+14.0 8 340.0 160.0 3609. 8.0 70 1
+15.0 8 400.0 150.0 3761. 9.5 70 1
+14.0 8 455.0 225.0 3086. 10.0 70 1
+24.0 4 113.0 95.00 2372. 15.0 70 3
+22.0 6 198.0 95.00 2833. 15.5 70 1
+18.0 6 199.0 97.00 2774. 15.5 70 1
+21.0 6 200.0 85.00 2587. 16.0 70 1
+27.0 4 97.00 88.00 2130. 14.5 70 3
+26.0 4 97.00 46.00 1835. 20.5 70 2
+25.0 4 110.0 87.00 2672. 17.5 70 2
+24.0 4 107.0 90.00 2430. 14.5 70 2
+25.0 4 104.0 95.00 2375. 17.5 70 2
+26.0 4 121.0 113.0 2234. 12.5 70 2
+21.0 6 199.0 90.00 2648. 15.0 70 1
+10.0 8 360.0 215.0 4615. 14.0 70 1
+10.0 8 307.0 200.0 4376. 15.0 70 1
+11.0 8 318.0 210.0 4382. 13.5 70 1
+9.0 8 304.0 193.0 4732. 18.5 70 1
+27.0 4 97.00 88.00 2130. 14.5 71 3
+28.0 4 140.0 90.00 2264. 15.5 71 1
+25.0 4 113.0 95.00 2228. 14.0 71 3
+25.0 4 98.00 ? 2046. 19.0 71 1
+19.0 6 232.0 100.0 2634. 13.0 71 1
+16.0 6 225.0 105.0 3439. 15.5 71 1
+17.0 6 250.0 100.0 3329. 15.5 71 1
+19.0 6 250.0 88.00 3302. 15.5 71 1
+18.0 6 232.0 100.0 3288. 15.5 71 1
+14.0 8 350.0 165.0 4209. 12.0 71 1
+14.0 8 400.0 175.0 4464. 11.5 71 1
+14.0 8 351.0 153.0 4154. 13.5 71 1
+14.0 8 318.0 150.0 4096. 13.0 71 1
+12.0 8 383.0 180.0 4955. 11.5 71 1
+13.0 8 400.0 170.0 4746. 12.0 71 1
+13.0 8 400.0 175.0 5140. 12.0 71 1
+18.0 6 258.0 110.0 2962. 13.5 71 1
+22.0 4 140.0 72.00 2408. 19.0 71 1
+19.0 6 250.0 100.0 3282. 15.0 71 1
+18.0 6 250.0 88.00 3139. 14.5 71 1
+23.0 4 122.0 86.00 2220. 14.0 71 1
+28.0 4 116.0 90.00 2123. 14.0 71 2
+30.0 4 79.00 70.00 2074. 19.5 71 2
+30.0 4 88.00 76.00 2065. 14.5 71 2
+31.0 4 71.00 65.00 1773. 19.0 71 3
+35.0 4 72.00 69.00 1613. 18.0 71 3
+27.0 4 97.00 60.00 1834. 19.0 71 2
+26.0 4 91.00 70.00 1955. 20.5 71 1
+24.0 4 113.0 95.00 2278. 15.5 72 3
+25.0 4 97.50 80.00 2126. 17.0 72 1
+23.0 4 97.00 54.00 2254. 23.5 72 2
+20.0 4 140.0 90.00 2408. 19.5 72 1
+21.0 4 122.0 86.00 2226. 16.5 72 1
+13.0 8 350.0 165.0 4274. 12.0 72 1
+14.0 8 400.0 175.0 4385. 12.0 72 1
+15.0 8 318.0 150.0 4135. 13.5 72 1
+14.0 8 351.0 153.0 4129. 13.0 72 1
+17.0 8 304.0 150.0 3672. 11.5 72 1
+11.0 8 429.0 208.0 4633. 11.0 72 1
+13.0 8 350.0 155.0 4502. 13.5 72 1
+12.0 8 350.0 160.0 4456. 13.5 72 1
+13.0 8 400.0 190.0 4422. 12.5 72 1
+19.0 3 70.00 97.00 2330. 13.5 72 3
+15.0 8 304.0 150.0 3892. 12.5 72 1
+13.0 8 307.0 130.0 4098. 14.0 72 1
+13.0 8 302.0 140.0 4294. 16.0 72 1
+14.0 8 318.0 150.0 4077. 14.0 72 1
+18.0 4 121.0 112.0 2933. 14.5 72 2
+22.0 4 121.0 76.00 2511. 18.0 72 2
+21.0 4 120.0 87.00 2979. 19.5 72 2
+26.0 4 96.00 69.00 2189. 18.0 72 2
+22.0 4 122.0 86.00 2395. 16.0 72 1
+28.0 4 97.00 92.00 2288. 17.0 72 3
+23.0 4 120.0 97.00 2506. 14.5 72 3
+28.0 4 98.00 80.00 2164. 15.0 72 1
+27.0 4 97.00 88.00 2100. 16.5 72 3
+13.0 8 350.0 175.0 4100. 13.0 73 1
+14.0 8 304.0 150.0 3672. 11.5 73 1
+13.0 8 350.0 145.0 3988. 13.0 73 1
+14.0 8 302.0 137.0 4042. 14.5 73 1
+15.0 8 318.0 150.0 3777. 12.5 73 1
+12.0 8 429.0 198.0 4952. 11.5 73 1
+13.0 8 400.0 150.0 4464. 12.0 73 1
+13.0 8 351.0 158.0 4363. 13.0 73 1
+14.0 8 318.0 150.0 4237. 14.5 73 1
+13.0 8 440.0 215.0 4735. 11.0 73 1
+12.0 8 455.0 225.0 4951. 11.0 73 1
+13.0 8 360.0 175.0 3821. 11.0 73 1
+18.0 6 225.0 105.0 3121. 16.5 73 1
+16.0 6 250.0 100.0 3278. 18.0 73 1
+18.0 6 232.0 100.0 2945. 16.0 73 1
+18.0 6 250.0 88.00 3021. 16.5 73 1
+23.0 6 198.0 95.00 2904. 16.0 73 1
+26.0 4 97.00 46.00 1950. 21.0 73 2
+11.0 8 400.0 150.0 4997. 14.0 73 1
+12.0 8 400.0 167.0 4906. 12.5 73 1
+13.0 8 360.0 170.0 4654. 13.0 73 1
+12.0 8 350.0 180.0 4499. 12.5 73 1
+18.0 6 232.0 100.0 2789. 15.0 73 1
+20.0 4 97.00 88.00 2279. 19.0 73 3
+21.0 4 140.0 72.00 2401. 19.5 73 1
+22.0 4 108.0 94.00 2379. 16.5 73 3
+18.0 3 70.00 90.00 2124. 13.5 73 3
+19.0 4 122.0 85.00 2310. 18.5 73 1
+21.0 6 155.0 107.0 2472. 14.0 73 1
+26.0 4 98.00 90.00 2265. 15.5 73 2
+15.0 8 350.0 145.0 4082. 13.0 73 1
+16.0 8 400.0 230.0 4278. 9.50 73 1
+29.0 4 68.00 49.00 1867. 19.5 73 2
+24.0 4 116.0 75.00 2158. 15.5 73 2
+20.0 4 114.0 91.00 2582. 14.0 73 2
+19.0 4 121.0 112.0 2868. 15.5 73 2
+15.0 8 318.0 150.0 3399. 11.0 73 1
+24.0 4 121.0 110.0 2660. 14.0 73 2
+20.0 6 156.0 122.0 2807. 13.5 73 3
+11.0 8 350.0 180.0 3664. 11.0 73 1
+20.0 6 198.0 95.00 3102. 16.5 74 1
+21.0 6 200.0 ? 2875. 17.0 74 1
+19.0 6 232.0 100.0 2901. 16.0 74 1
+15.0 6 250.0 100.0 3336. 17.0 74 1
+31.0 4 79.00 67.00 1950. 19.0 74 3
+26.0 4 122.0 80.00 2451. 16.5 74 1
+32.0 4 71.00 65.00 1836. 21.0 74 3
+25.0 4 140.0 75.00 2542. 17.0 74 1
+16.0 6 250.0 100.0 3781. 17.0 74 1
+16.0 6 258.0 110.0 3632. 18.0 74 1
+18.0 6 225.0 105.0 3613. 16.5 74 1
+16.0 8 302.0 140.0 4141. 14.0 74 1
+13.0 8 350.0 150.0 4699. 14.5 74 1
+14.0 8 318.0 150.0 4457. 13.5 74 1
+14.0 8 302.0 140.0 4638. 16.0 74 1
+14.0 8 304.0 150.0 4257. 15.5 74 1
+29.0 4 98.00 83.00 2219. 16.5 74 2
+26.0 4 79.00 67.00 1963. 15.5 74 2
+26.0 4 97.00 78.00 2300. 14.5 74 2
+31.0 4 76.00 52.00 1649. 16.5 74 3
+32.0 4 83.00 61.00 2003. 19.0 74 3
+28.0 4 90.00 75.00 2125. 14.5 74 1
+24.0 4 90.00 75.00 2108. 15.5 74 2
+26.0 4 116.0 75.00 2246. 14.0 74 2
+24.0 4 120.0 97.00 2489. 15.0 74 3
+26.0 4 108.0 93.00 2391. 15.5 74 3
+31.0 4 79.00 67.00 2000. 16.0 74 2
+19.0 6 225.0 95.00 3264. 16.0 75 1
+18.0 6 250.0 105.0 3459. 16.0 75 1
+15.0 6 250.0 72.00 3432. 21.0 75 1
+15.0 6 250.0 72.00 3158. 19.5 75 1
+16.0 8 400.0 170.0 4668. 11.5 75 1
+15.0 8 350.0 145.0 4440. 14.0 75 1
+16.0 8 318.0 150.0 4498. 14.5 75 1
+14.0 8 351.0 148.0 4657. 13.5 75 1
+17.0 6 231.0 110.0 3907. 21.0 75 1
+16.0 6 250.0 105.0 3897. 18.5 75 1
+15.0 6 258.0 110.0 3730. 19.0 75 1
+18.0 6 225.0 95.00 3785. 19.0 75 1
+21.0 6 231.0 110.0 3039. 15.0 75 1
+20.0 8 262.0 110.0 3221. 13.5 75 1
+13.0 8 302.0 129.0 3169. 12.0 75 1
+29.0 4 97.00 75.00 2171. 16.0 75 3
+23.0 4 140.0 83.00 2639. 17.0 75 1
+20.0 6 232.0 100.0 2914. 16.0 75 1
+23.0 4 140.0 78.00 2592. 18.5 75 1
+24.0 4 134.0 96.00 2702. 13.5 75 3
+25.0 4 90.00 71.00 2223. 16.5 75 2
+24.0 4 119.0 97.00 2545. 17.0 75 3
+18.0 6 171.0 97.00 2984. 14.5 75 1
+29.0 4 90.00 70.00 1937. 14.0 75 2
+19.0 6 232.0 90.00 3211. 17.0 75 1
+23.0 4 115.0 95.00 2694. 15.0 75 2
+23.0 4 120.0 88.00 2957. 17.0 75 2
+22.0 4 121.0 98.00 2945. 14.5 75 2
+25.0 4 121.0 115.0 2671. 13.5 75 2
+33.0 4 91.00 53.00 1795. 17.5 75 3
+28.0 4 107.0 86.00 2464. 15.5 76 2
+25.0 4 116.0 81.00 2220. 16.9 76 2
+25.0 4 140.0 92.00 2572. 14.9 76 1
+26.0 4 98.00 79.00 2255. 17.7 76 1
+27.0 4 101.0 83.00 2202. 15.3 76 2
+17.5 8 305.0 140.0 4215. 13.0 76 1
+16.0 8 318.0 150.0 4190. 13.0 76 1
+15.5 8 304.0 120.0 3962. 13.9 76 1
+14.5 8 351.0 152.0 4215. 12.8 76 1
+22.0 6 225.0 100.0 3233. 15.4 76 1
+22.0 6 250.0 105.0 3353. 14.5 76 1
+24.0 6 200.0 81.00 3012. 17.6 76 1
+22.5 6 232.0 90.00 3085. 17.6 76 1
+29.0 4 85.00 52.00 2035. 22.2 76 1
+24.5 4 98.00 60.00 2164. 22.1 76 1
+29.0 4 90.00 70.00 1937. 14.2 76 2
+33.0 4 91.00 53.00 1795. 17.4 76 3
+20.0 6 225.0 100.0 3651. 17.7 76 1
+18.0 6 250.0 78.00 3574. 21.0 76 1
+18.5 6 250.0 110.0 3645. 16.2 76 1
+17.5 6 258.0 95.00 3193. 17.8 76 1
+29.5 4 97.00 71.00 1825. 12.2 76 2
+32.0 4 85.00 70.00 1990. 17.0 76 3
+28.0 4 97.00 75.00 2155. 16.4 76 3
+26.5 4 140.0 72.00 2565. 13.6 76 1
+20.0 4 130.0 102.0 3150. 15.7 76 2
+13.0 8 318.0 150.0 3940. 13.2 76 1
+19.0 4 120.0 88.00 3270. 21.9 76 2
+19.0 6 156.0 108.0 2930. 15.5 76 3
+16.5 6 168.0 120.0 3820. 16.7 76 2
+16.5 8 350.0 180.0 4380. 12.1 76 1
+13.0 8 350.0 145.0 4055. 12.0 76 1
+13.0 8 302.0 130.0 3870. 15.0 76 1
+13.0 8 318.0 150.0 3755. 14.0 76 1
+31.5 4 98.00 68.00 2045. 18.5 77 3
+30.0 4 111.0 80.00 2155. 14.8 77 1
+36.0 4 79.00 58.00 1825. 18.6 77 2
+25.5 4 122.0 96.00 2300. 15.5 77 1
+33.5 4 85.00 70.00 1945. 16.8 77 3
+17.5 8 305.0 145.0 3880. 12.5 77 1
+17.0 8 260.0 110.0 4060. 19.0 77 1
+15.5 8 318.0 145.0 4140. 13.7 77 1
+15.0 8 302.0 130.0 4295. 14.9 77 1
+17.5 6 250.0 110.0 3520. 16.4 77 1
+20.5 6 231.0 105.0 3425. 16.9 77 1
+19.0 6 225.0 100.0 3630. 17.7 77 1
+18.5 6 250.0 98.00 3525. 19.0 77 1
+16.0 8 400.0 180.0 4220. 11.1 77 1
+15.5 8 350.0 170.0 4165. 11.4 77 1
+15.5 8 400.0 190.0 4325. 12.2 77 1
+16.0 8 351.0 149.0 4335. 14.5 77 1
+29.0 4 97.00 78.00 1940. 14.5 77 2
+24.5 4 151.0 88.00 2740. 16.0 77 1
+26.0 4 97.00 75.00 2265. 18.2 77 3
+25.5 4 140.0 89.00 2755. 15.8 77 1
+30.5 4 98.00 63.00 2051. 17.0 77 1
+33.5 4 98.00 83.00 2075. 15.9 77 1
+30.0 4 97.00 67.00 1985. 16.4 77 3
+30.5 4 97.00 78.00 2190. 14.1 77 2
+22.0 6 146.0 97.00 2815. 14.5 77 3
+21.5 4 121.0 110.0 2600. 12.8 77 2
+21.5 3 80.00 110.0 2720. 13.5 77 3
+43.1 4 90.00 48.00 1985. 21.5 78 2
+36.1 4 98.00 66.00 1800. 14.4 78 1
+32.8 4 78.00 52.00 1985. 19.4 78 3
+39.4 4 85.00 70.00 2070. 18.6 78 3
+36.1 4 91.00 60.00 1800. 16.4 78 3
+19.9 8 260.0 110.0 3365. 15.5 78 1
+19.4 8 318.0 140.0 3735. 13.2 78 1
+20.2 8 302.0 139.0 3570. 12.8 78 1
+19.2 6 231.0 105.0 3535. 19.2 78 1
+20.5 6 200.0 95.00 3155. 18.2 78 1
+20.2 6 200.0 85.00 2965. 15.8 78 1
+25.1 4 140.0 88.00 2720. 15.4 78 1
+20.5 6 225.0 100.0 3430. 17.2 78 1
+19.4 6 232.0 90.00 3210. 17.2 78 1
+20.6 6 231.0 105.0 3380. 15.8 78 1
+20.8 6 200.0 85.00 3070. 16.7 78 1
+18.6 6 225.0 110.0 3620. 18.7 78 1
+18.1 6 258.0 120.0 3410. 15.1 78 1
+19.2 8 305.0 145.0 3425. 13.2 78 1
+17.7 6 231.0 165.0 3445. 13.4 78 1
+18.1 8 302.0 139.0 3205. 11.2 78 1
+17.5 8 318.0 140.0 4080. 13.7 78 1
+30.0 4 98.00 68.00 2155. 16.5 78 1
+27.5 4 134.0 95.00 2560. 14.2 78 3
+27.2 4 119.0 97.00 2300. 14.7 78 3
+30.9 4 105.0 75.00 2230. 14.5 78 1
+21.1 4 134.0 95.00 2515. 14.8 78 3
+23.2 4 156.0 105.0 2745. 16.7 78 1
+23.8 4 151.0 85.00 2855. 17.6 78 1
+23.9 4 119.0 97.00 2405. 14.9 78 3
+20.3 5 131.0 103.0 2830. 15.9 78 2
+17.0 6 163.0 125.0 3140. 13.6 78 2
+21.6 4 121.0 115.0 2795. 15.7 78 2
+16.2 6 163.0 133.0 3410. 15.8 78 2
+31.5 4 89.00 71.00 1990. 14.9 78 2
+29.5 4 98.00 68.00 2135. 16.6 78 3
+21.5 6 231.0 115.0 3245. 15.4 79 1
+19.8 6 200.0 85.00 2990. 18.2 79 1
+22.3 4 140.0 88.00 2890. 17.3 79 1
+20.2 6 232.0 90.00 3265. 18.2 79 1
+20.6 6 225.0 110.0 3360. 16.6 79 1
+17.0 8 305.0 130.0 3840. 15.4 79 1
+17.6 8 302.0 129.0 3725. 13.4 79 1
+16.5 8 351.0 138.0 3955. 13.2 79 1
+18.2 8 318.0 135.0 3830. 15.2 79 1
+16.9 8 350.0 155.0 4360. 14.9 79 1
+15.5 8 351.0 142.0 4054. 14.3 79 1
+19.2 8 267.0 125.0 3605. 15.0 79 1
+18.5 8 360.0 150.0 3940. 13.0 79 1
+31.9 4 89.00 71.00 1925. 14.0 79 2
+34.1 4 86.00 65.00 1975. 15.2 79 3
+35.7 4 98.00 80.00 1915. 14.4 79 1
+27.4 4 121.0 80.00 2670. 15.0 79 1
+25.4 5 183.0 77.00 3530. 20.1 79 2
+23.0 8 350.0 125.0 3900. 17.4 79 1
+27.2 4 141.0 71.00 3190. 24.8 79 2
+23.9 8 260.0 90.00 3420. 22.2 79 1
+34.2 4 105.0 70.00 2200. 13.2 79 1
+34.5 4 105.0 70.00 2150. 14.9 79 1
+31.8 4 85.00 65.00 2020. 19.2 79 3
+37.3 4 91.00 69.00 2130. 14.7 79 2
+28.4 4 151.0 90.00 2670. 16.0 79 1
+28.8 6 173.0 115.0 2595. 11.3 79 1
+26.8 6 173.0 115.0 2700. 12.9 79 1
+33.5 4 151.0 90.00 2556. 13.2 79 1
+41.5 4 98.00 76.00 2144. 14.7 80 2
+38.1 4 89.00 60.00 1968. 18.8 80 3
+32.1 4 98.00 70.00 2120. 15.5 80 1
+37.2 4 86.00 65.00 2019. 16.4 80 3
+28.0 4 151.0 90.00 2678. 16.5 80 1
+26.4 4 140.0 88.00 2870. 18.1 80 1
+24.3 4 151.0 90.00 3003. 20.1 80 1
+19.1 6 225.0 90.00 3381. 18.7 80 1
+34.3 4 97.00 78.00 2188. 15.8 80 2
+29.8 4 134.0 90.00 2711. 15.5 80 3
+31.3 4 120.0 75.00 2542. 17.5 80 3
+37.0 4 119.0 92.00 2434. 15.0 80 3
+32.2 4 108.0 75.00 2265. 15.2 80 3
+46.6 4 86.00 65.00 2110. 17.9 80 3
+27.9 4 156.0 105.0 2800. 14.4 80 1
+40.8 4 85.00 65.00 2110. 19.2 80 3
+44.3 4 90.00 48.00 2085. 21.7 80 2
+43.4 4 90.00 48.00 2335. 23.7 80 2
+36.4 5 121.0 67.00 2950. 19.9 80 2
+30.0 4 146.0 67.00 3250. 21.8 80 2
+44.6 4 91.00 67.00 1850. 13.8 80 3
+40.9 4 85.00 ? 1835. 17.3 80 2
+33.8 4 97.00 67.00 2145. 18.0 80 3
+29.8 4 89.00 62.00 1845. 15.3 80 2
+32.7 6 168.0 132.0 2910. 11.4 80 3
+23.7 3 70.00 100.0 2420. 12.5 80 3
+35.0 4 122.0 88.00 2500. 15.1 80 2
+23.6 4 140.0 ? 2905. 14.3 80 1
+32.4 4 107.0 72.00 2290. 17.0 80 3
+27.2 4 135.0 84.00 2490. 15.7 81 1
+26.6 4 151.0 84.00 2635. 16.4 81 1
+25.8 4 156.0 92.00 2620. 14.4 81 1
+23.5 6 173.0 110.0 2725. 12.6 81 1
+30.0 4 135.0 84.00 2385. 12.9 81 1
+39.1 4 79.00 58.00 1755. 16.9 81 3
+39.0 4 86.00 64.00 1875. 16.4 81 1
+35.1 4 81.00 60.00 1760. 16.1 81 3
+32.3 4 97.00 67.00 2065. 17.8 81 3
+37.0 4 85.00 65.00 1975. 19.4 81 3
+37.7 4 89.00 62.00 2050. 17.3 81 3
+34.1 4 91.00 68.00 1985. 16.0 81 3
+34.7 4 105.0 63.00 2215. 14.9 81 1
+34.4 4 98.00 65.00 2045. 16.2 81 1
+29.9 4 98.00 65.00 2380. 20.7 81 1
+33.0 4 105.0 74.00 2190. 14.2 81 2
+34.5 4 100.0 ? 2320. 15.8 81 2
+33.7 4 107.0 75.00 2210. 14.4 81 3
+32.4 4 108.0 75.00 2350. 16.8 81 3
+32.9 4 119.0 100.0 2615. 14.8 81 3
+31.6 4 120.0 74.00 2635. 18.3 81 3
+28.1 4 141.0 80.00 3230. 20.4 81 2
+30.7 6 145.0 76.00 3160. 19.6 81 2
+25.4 6 168.0 116.0 2900. 12.6 81 3
+24.2 6 146.0 120.0 2930. 13.8 81 3
+22.4 6 231.0 110.0 3415. 15.8 81 1
+26.6 8 350.0 105.0 3725. 19.0 81 1
+20.2 6 200.0 88.00 3060. 17.1 81 1
+17.6 6 225.0 85.00 3465. 16.6 81 1
+28.0 4 112.0 88.00 2605. 19.6 82 1
+27.0 4 112.0 88.00 2640. 18.6 82 1
+34.0 4 112.0 88.00 2395. 18.0 82 1
+31.0 4 112.0 85.00 2575. 16.2 82 1
+29.0 4 135.0 84.00 2525. 16.0 82 1
+27.0 4 151.0 90.00 2735. 18.0 82 1
+24.0 4 140.0 92.00 2865. 16.4 82 1
+23.0 4 151.0 ? 3035. 20.5 82 1
+36.0 4 105.0 74.00 1980. 15.3 82 2
+37.0 4 91.00 68.00 2025. 18.2 82 3
+31.0 4 91.00 68.00 1970. 17.6 82 3
+38.0 4 105.0 63.00 2125. 14.7 82 1
+36.0 4 98.00 70.00 2125. 17.3 82 1
+36.0 4 120.0 88.00 2160. 14.5 82 3
+36.0 4 107.0 75.00 2205. 14.5 82 3
+34.0 4 108.0 70.00 2245 16.9 82 3
+38.0 4 91.00 67.00 1965. 15.0 82 3
+32.0 4 91.00 67.00 1965. 15.7 82 3
+38.0 4 91.00 67.00 1995. 16.2 82 3
+25.0 6 181.0 110.0 2945. 16.4 82 1
+38.0 6 262.0 85.00 3015. 17.0 82 1
+26.0 4 156.0 92.00 2585. 14.5 82 1
+22.0 6 232.0 112.0 2835 14.7 82 1
+32.0 4 144.0 96.00 2665. 13.9 82 3
+36.0 4 135.0 84.00 2370. 13.0 82 1
+27.0 4 151.0 90.00 2950. 17.3 82 1
+27.0 4 140.0 86.00 2790. 15.6 82 1
+44.0 4 97.00 52.00 2130. 24.6 82 2
+32.0 4 135.0 84.00 2295. 11.6 82 1
+28.0 4 120.0 79.00 2625. 18.6 82 1
+31.0 4 119.0 82.00 2720. 19.4 82 1
diff -r 000000000000 -r 1f20fe57fdee test-data/evaluation_report_classification.html
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/evaluation_report_classification.html Wed Dec 11 04:59:43 2024 +0000
@@ -0,0 +1,236 @@
+
+
+
+
+ Model Training Report
+
+
+
+
+
+
Model Evaluation Report
+
+
+
Metrics
+
+
+
+
+ Model |
+ Accuracy |
+ AUC |
+ Recall |
+ Prec. |
+ F1 |
+ Kappa |
+ MCC |
+ PR-AUC-Weighted |
+
+
+
+
+ Light Gradient Boosting Machine |
+ 0.7826 |
+ 0.8162 |
+ 0.7419 |
+ 0.7667 |
+ 0.7541 |
+ 0.5594 |
+ 0.5596 |
+ 0.7753 |
+
+
+
+
+
+
+
Plots
+
+
+
Confusion_matrix
+
+
+
+
+
+
Auc
+
+
+
+
+
+
Threshold
+
+
+
+
+
+
Pr
+
+
+
+
+
+
Error
+
+
+
+
+
+
Class_report
+
+
+
+
+
+
Learning
+
+
+
+
+
+
Calibration
+
+
+
+
+
+
Vc
+
+
+
+
+
+
Dimension
+
+
+
+
+
+
Manifold
+
+
+
+
+
+
Rfe
+
+
+
+
+
+
Feature
+
+
+
+
+
+
Feature_all
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff -r 000000000000 -r 1f20fe57fdee test-data/evaluation_report_regression.html
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/evaluation_report_regression.html Wed Dec 11 04:59:43 2024 +0000
@@ -0,0 +1,202 @@
+
+
+
+
+ Model Training Report
+
+
+
+
+
+
Model Evaluation Report
+
+
+
Metrics
+
+
+
+
+ Model |
+ MAE |
+ MSE |
+ RMSE |
+ R2 |
+ RMSLE |
+ MAPE |
+
+
+
+
+ Gradient Boosting Regressor |
+ 1.6 |
+ 5.6214 |
+ 2.3709 |
+ 0.9077 |
+ 0.0875 |
+ 0.0691 |
+
+
+
+
+
+
+
Plots
+
+
+
Residuals
+
+
+
+
+
+
Error
+
+
+
+
+
+
Cooks
+
+
+
+
+
+
Learning
+
+
+
+
+
+
Vc
+
+
+
+
+
+
Manifold
+
+
+
+
+
+
Rfe
+
+
+
+
+
+
Feature
+
+
+
+
+
+
Feature_all
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff -r 000000000000 -r 1f20fe57fdee test-data/expected_best_model_classification.csv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/expected_best_model_classification.csv Wed Dec 11 04:59:43 2024 +0000
@@ -0,0 +1,20 @@
+Parameter,Value
+boosting_type,gbdt
+class_weight,
+colsample_bytree,1.0
+importance_type,split
+learning_rate,0.1
+max_depth,-1
+min_child_samples,20
+min_child_weight,0.001
+min_split_gain,0.0
+n_estimators,100
+n_jobs,-1
+num_leaves,31
+objective,
+random_state,42
+reg_alpha,0.0
+reg_lambda,0.0
+subsample,1.0
+subsample_for_bin,200000
+subsample_freq,0
diff -r 000000000000 -r 1f20fe57fdee test-data/expected_best_model_classification_customized.csv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/expected_best_model_classification_customized.csv Wed Dec 11 04:59:43 2024 +0000
@@ -0,0 +1,20 @@
+Parameter,Value
+boosting_type,gbdt
+class_weight,
+colsample_bytree,1.0
+importance_type,split
+learning_rate,0.1
+max_depth,-1
+min_child_samples,20
+min_child_weight,0.001
+min_split_gain,0.0
+n_estimators,100
+n_jobs,-1
+num_leaves,31
+objective,
+random_state,42
+reg_alpha,0.0
+reg_lambda,0.0
+subsample,1.0
+subsample_for_bin,200000
+subsample_freq,0
diff -r 000000000000 -r 1f20fe57fdee test-data/expected_best_model_regression.csv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/expected_best_model_regression.csv Wed Dec 11 04:59:43 2024 +0000
@@ -0,0 +1,22 @@
+Parameter,Value
+alpha,0.9
+ccp_alpha,0.0
+criterion,friedman_mse
+init,
+learning_rate,0.1
+loss,squared_error
+max_depth,3
+max_features,
+max_leaf_nodes,
+min_impurity_decrease,0.0
+min_samples_leaf,1
+min_samples_split,2
+min_weight_fraction_leaf,0.0
+n_estimators,100
+n_iter_no_change,
+random_state,42
+subsample,1.0
+tol,0.0001
+validation_fraction,0.1
+verbose,0
+warm_start,False
diff -r 000000000000 -r 1f20fe57fdee test-data/expected_comparison_result_classification.html
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/expected_comparison_result_classification.html Wed Dec 11 04:59:43 2024 +0000
@@ -0,0 +1,606 @@
+
+
+
+
+ Model Training Report
+
+
+
+
+
+
PyCaret Model Training Report
+
+
+ Setup & Best Model
+
+ Best Model Plots
+
+ Feature Importance
+
+ Explainer
+
+
+
+
Setup Parameters
+
+ Parameter | Value |
+
+
+
+ target |
+ PCR |
+
+
+ session_id |
+ 42 |
+
+
+ index |
+ False |
+
+
+
+
+
If you want to know all the experiment setup parameters,
+ please check the PyCaret documentation for
+ the classification/regression exp
function.
+
Best Model: LGBMClassifier
+
+ Parameter | Value |
+
+
+
+ boosting_type |
+ gbdt |
+
+
+ class_weight |
+ None |
+
+
+ colsample_bytree |
+ 1.0 |
+
+
+ importance_type |
+ split |
+
+
+ learning_rate |
+ 0.1 |
+
+
+ max_depth |
+ -1 |
+
+
+ min_child_samples |
+ 20 |
+
+
+ min_child_weight |
+ 0.001 |
+
+
+ min_split_gain |
+ 0.0 |
+
+
+ n_estimators |
+ 100 |
+
+
+ n_jobs |
+ -1 |
+
+
+ num_leaves |
+ 31 |
+
+
+ objective |
+ None |
+
+
+ random_state |
+ 42 |
+
+
+ reg_alpha |
+ 0.0 |
+
+
+ reg_lambda |
+ 0.0 |
+
+
+ subsample |
+ 1.0 |
+
+
+ subsample_for_bin |
+ 200000 |
+
+
+ subsample_freq |
+ 0 |
+
+
+
+
+
Comparison Results on the Cross-Validation Set
+
+
+
+
+ Model |
+ Accuracy |
+ ROC-AUC |
+ Recall |
+ Prec. |
+ F1 |
+ Kappa |
+ MCC |
+ PR-AUC-Weighted |
+ TT (Sec) |
+
+
+
+
+ Light Gradient Boosting Machine |
+ 0.715 |
+ 0.6000 |
+ 0.6500 |
+ 0.6917 |
+ 0.6357 |
+ 0.4380 |
+ 0.4748 |
+ 0.6822 |
+ 0.228 |
+
+
+ Logistic Regression |
+ 0.670 |
+ 0.6500 |
+ 0.7500 |
+ 0.6167 |
+ 0.6633 |
+ 0.3478 |
+ 0.3742 |
+ 0.7144 |
+ 0.331 |
+
+
+ Ridge Classifier |
+ 0.670 |
+ 0.6167 |
+ 0.7500 |
+ 0.6167 |
+ 0.6633 |
+ 0.3478 |
+ 0.3742 |
+ 0.0000 |
+ 0.180 |
+
+
+ Naive Bayes |
+ 0.650 |
+ 0.6333 |
+ 0.6833 |
+ 0.5917 |
+ 0.6257 |
+ 0.2969 |
+ 0.3112 |
+ 0.6978 |
+ 2.694 |
+
+
+ Quadratic Discriminant Analysis |
+ 0.610 |
+ 0.6333 |
+ 0.4667 |
+ 0.5333 |
+ 0.4733 |
+ 0.2256 |
+ 0.2488 |
+ 0.7033 |
+ 0.158 |
+
+
+ Linear Discriminant Analysis |
+ 0.605 |
+ 0.6000 |
+ 0.7000 |
+ 0.5900 |
+ 0.6105 |
+ 0.2372 |
+ 0.2577 |
+ 0.6594 |
+ 0.110 |
+
+
+ CatBoost Classifier |
+ 0.595 |
+ 0.6167 |
+ 0.6167 |
+ 0.5500 |
+ 0.5600 |
+ 0.2165 |
+ 0.2207 |
+ 0.6861 |
+ 12.075 |
+
+
+ Extra Trees Classifier |
+ 0.590 |
+ 0.6000 |
+ 0.5833 |
+ 0.5000 |
+ 0.5300 |
+ 0.2103 |
+ 0.2167 |
+ 0.6811 |
+ 0.775 |
+
+
+ SVM - Linear Kernel |
+ 0.585 |
+ 0.6500 |
+ 0.5333 |
+ 0.4667 |
+ 0.4521 |
+ 0.1429 |
+ 0.1690 |
+ 0.0000 |
+ 0.217 |
+
+
+ K Neighbors Classifier |
+ 0.565 |
+ 0.6292 |
+ 0.5000 |
+ 0.5750 |
+ 0.5057 |
+ 0.1413 |
+ 0.1469 |
+ 0.6717 |
+ 0.685 |
+
+
+ Random Forest Classifier |
+ 0.555 |
+ 0.5667 |
+ 0.5833 |
+ 0.5167 |
+ 0.5233 |
+ 0.1524 |
+ 0.1540 |
+ 0.6211 |
+ 0.847 |
+
+
+ Dummy Classifier |
+ 0.540 |
+ 0.5000 |
+ 0.0000 |
+ 0.0000 |
+ 0.0000 |
+ 0.0000 |
+ 0.0000 |
+ 0.4600 |
+ 0.165 |
+
+
+ Ada Boost Classifier |
+ 0.510 |
+ 0.4417 |
+ 0.5667 |
+ 0.4650 |
+ 0.4971 |
+ 0.0656 |
+ 0.0275 |
+ 0.5819 |
+ 0.645 |
+
+
+ Decision Tree Classifier |
+ 0.495 |
+ 0.5000 |
+ 0.4333 |
+ 0.4333 |
+ 0.4133 |
+ 0.0049 |
+ 0.0040 |
+ 0.5483 |
+ 0.329 |
+
+
+ Gradient Boosting Classifier |
+ 0.475 |
+ 0.4333 |
+ 0.4500 |
+ 0.4000 |
+ 0.4033 |
+ -0.0033 |
+ -0.0239 |
+ 0.5800 |
+ 0.643 |
+
+
+ Extreme Gradient Boosting |
+ 0.460 |
+ 0.4833 |
+ 0.4333 |
+ 0.3333 |
+ 0.3667 |
+ -0.0489 |
+ -0.0537 |
+ 0.6281 |
+ 0.422 |
+
+
+
+
+
Results on the Test Set for the best model
+
+
+
+
+ Model |
+ Accuracy |
+ ROC-AUC |
+ Recall |
+ Prec. |
+ F1 |
+ Kappa |
+ MCC |
+ PR-AUC-Weighted |
+
+
+
+
+ Light Gradient Boosting Machine |
+ 0.8095 |
+ 0.7454 |
+ 0.6667 |
+ 0.8571 |
+ 0.75 |
+ 0.6 |
+ 0.6124 |
+ 0.6799 |
+
+
+
+
+
+
+
Best Model Plots on the testing set
+
+
+
Confusion_matrix
+
+
+
+
+
Auc
+
+
+
+
+
Threshold
+
+
+
+
+
Pr
+
+
+
+
+
Error
+
+
+
+
+
Class_report
+
+
+
+
+
Learning
+
+
+
+
+
Calibration
+
+
+
+
+
Vc
+
+
+
+
+
Dimension
+
+
+
+
+
Manifold
+
+
+
+
+
Rfe
+
+
+
+
+
Feature
+
+
+
+
+
Feature_all
+
+
+
+
+
+
+
PyCaret Feature Importance Report
+
+
+
Feature importance analysis from atrained Random Forest
+
Use gini impurity forcalculating feature importance for classificationand Variance Reduction for regression
+
+
+
+
+
SHAP Summary from a trained lightgbm
+
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff -r 000000000000 -r 1f20fe57fdee test-data/expected_comparison_result_classification_customized.html
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/expected_comparison_result_classification_customized.html Wed Dec 11 04:59:43 2024 +0000
@@ -0,0 +1,620 @@
+
+
+
+
+ Model Training Report
+
+
+
+
+
+
PyCaret Model Training Report
+
+
+ Setup & Best Model
+
+ Best Model Plots
+
+ Feature Importance
+
+ Explainer
+
+
+
+
Setup Parameters
+
+ Parameter | Value |
+
+
+
+ target |
+ PCR |
+
+
+ session_id |
+ 42 |
+
+
+ index |
+ False |
+
+
+ train_size |
+ 0.8 |
+
+
+ normalize |
+ True |
+
+
+ feature_selection |
+ True |
+
+
+ fold |
+ 5 |
+
+
+ remove_outliers |
+ True |
+
+
+ remove_multicollinearity |
+ True |
+
+
+
+
+
If you want to know all the experiment setup parameters,
+ please check the PyCaret documentation for
+ the classification/regression exp
function.
+
Best Model: LGBMClassifier
+
+ Parameter | Value |
+
+
+
+ boosting_type |
+ gbdt |
+
+
+ class_weight |
+ None |
+
+
+ colsample_bytree |
+ 1.0 |
+
+
+ importance_type |
+ split |
+
+
+ learning_rate |
+ 0.1 |
+
+
+ max_depth |
+ -1 |
+
+
+ min_child_samples |
+ 20 |
+
+
+ min_child_weight |
+ 0.001 |
+
+
+ min_split_gain |
+ 0.0 |
+
+
+ n_estimators |
+ 100 |
+
+
+ n_jobs |
+ -1 |
+
+
+ num_leaves |
+ 31 |
+
+
+ objective |
+ None |
+
+
+ random_state |
+ 42 |
+
+
+ reg_alpha |
+ 0.0 |
+
+
+ reg_lambda |
+ 0.0 |
+
+
+ subsample |
+ 1.0 |
+
+
+ subsample_for_bin |
+ 200000 |
+
+
+ subsample_freq |
+ 0 |
+
+
+
+
+
Comparison Results on the Cross-Validation Set
+
+
+
+
+ Model |
+ Accuracy |
+ ROC-AUC |
+ Recall |
+ Prec. |
+ F1 |
+ Kappa |
+ MCC |
+ PR-AUC-Weighted |
+ TT (Sec) |
+
+
+
+
+ Light Gradient Boosting Machine |
+ 0.7091 |
+ 0.6267 |
+ 0.64 |
+ 0.6895 |
+ 0.6467 |
+ 0.4056 |
+ 0.4224 |
+ 0.5918 |
+ 0.322 |
+
+
+ Naive Bayes |
+ 0.6545 |
+ 0.6800 |
+ 0.72 |
+ 0.6117 |
+ 0.6498 |
+ 0.3163 |
+ 0.3232 |
+ 0.6930 |
+ 1.240 |
+
+
+ K Neighbors Classifier |
+ 0.6364 |
+ 0.6467 |
+ 0.56 |
+ 0.6067 |
+ 0.5743 |
+ 0.2603 |
+ 0.2660 |
+ 0.6001 |
+ 0.864 |
+
+
+ Ridge Classifier |
+ 0.6364 |
+ 0.6467 |
+ 0.64 |
+ 0.5962 |
+ 0.6048 |
+ 0.2700 |
+ 0.2835 |
+ 0.0000 |
+ 0.898 |
+
+
+ Random Forest Classifier |
+ 0.6364 |
+ 0.6300 |
+ 0.60 |
+ 0.6343 |
+ 0.6013 |
+ 0.2688 |
+ 0.2834 |
+ 0.6539 |
+ 0.906 |
+
+
+ Logistic Regression |
+ 0.6364 |
+ 0.6400 |
+ 0.64 |
+ 0.5962 |
+ 0.6048 |
+ 0.2700 |
+ 0.2835 |
+ 0.6697 |
+ 0.798 |
+
+
+ Quadratic Discriminant Analysis |
+ 0.6364 |
+ 0.6933 |
+ 0.72 |
+ 0.5851 |
+ 0.6353 |
+ 0.2815 |
+ 0.2899 |
+ 0.7075 |
+ 0.418 |
+
+
+ Linear Discriminant Analysis |
+ 0.6364 |
+ 0.6467 |
+ 0.64 |
+ 0.5962 |
+ 0.6048 |
+ 0.2700 |
+ 0.2835 |
+ 0.6751 |
+ 0.364 |
+
+
+ Gradient Boosting Classifier |
+ 0.6182 |
+ 0.6333 |
+ 0.60 |
+ 0.5843 |
+ 0.5846 |
+ 0.2328 |
+ 0.2389 |
+ 0.6403 |
+ 0.522 |
+
+
+ Ada Boost Classifier |
+ 0.6182 |
+ 0.6567 |
+ 0.60 |
+ 0.5943 |
+ 0.5891 |
+ 0.2340 |
+ 0.2415 |
+ 0.6517 |
+ 0.560 |
+
+
+ Extra Trees Classifier |
+ 0.6182 |
+ 0.5800 |
+ 0.56 |
+ 0.5876 |
+ 0.5622 |
+ 0.2266 |
+ 0.2347 |
+ 0.6413 |
+ 0.468 |
+
+
+ Decision Tree Classifier |
+ 0.6000 |
+ 0.5967 |
+ 0.56 |
+ 0.5867 |
+ 0.5533 |
+ 0.1950 |
+ 0.2060 |
+ 0.5215 |
+ 1.532 |
+
+
+ CatBoost Classifier |
+ 0.5818 |
+ 0.6667 |
+ 0.48 |
+ 0.5133 |
+ 0.4845 |
+ 0.1454 |
+ 0.1414 |
+ 0.6991 |
+ 3.426 |
+
+
+ SVM - Linear Kernel |
+ 0.5455 |
+ 0.5000 |
+ 0.40 |
+ 0.5033 |
+ 0.4332 |
+ 0.0684 |
+ 0.0685 |
+ 0.0000 |
+ 1.666 |
+
+
+ Dummy Classifier |
+ 0.5455 |
+ 0.5000 |
+ 0.00 |
+ 0.0000 |
+ 0.0000 |
+ 0.0000 |
+ 0.0000 |
+ 0.4545 |
+ 0.456 |
+
+
+ Extreme Gradient Boosting |
+ 0.5273 |
+ 0.5600 |
+ 0.52 |
+ 0.4967 |
+ 0.5042 |
+ 0.0550 |
+ 0.0564 |
+ 0.5943 |
+ 0.336 |
+
+
+
+
+
Results on the Test Set for the best model
+
+
+
+
+ Model |
+ Accuracy |
+ ROC-AUC |
+ Recall |
+ Prec. |
+ F1 |
+ Kappa |
+ MCC |
+ PR-AUC-Weighted |
+
+
+
+
+ Light Gradient Boosting Machine |
+ 0.7857 |
+ 0.7604 |
+ 0.6667 |
+ 0.8 |
+ 0.7273 |
+ 0.5532 |
+ 0.5594 |
+ 0.7502 |
+
+
+
+
+
+
+
Best Model Plots on the testing set
+
+
+
Confusion_matrix
+
+
+
+
+
Auc
+
+
+
+
+
Threshold
+
+
+
+
+
Pr
+
+
+
+
+
Error
+
+
+
+
+
Class_report
+
+
+
+
+
Learning
+
+
+
+
+
Calibration
+
+
+
+
+
Vc
+
+
+
+
+
Dimension
+
+
+
+
+
Manifold
+
+
+
+
+
Rfe
+
+
+
+
+
Feature
+
+
+
+
+
Feature_all
+
+
+
+
+
+
+
PyCaret Feature Importance Report
+
+
+
Feature importance analysis from atrained Random Forest
+
Use gini impurity forcalculating feature importance for classificationand Variance Reduction for regression
+
+
+
+
+
SHAP Summary from a trained lightgbm
+
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff -r 000000000000 -r 1f20fe57fdee test-data/expected_comparison_result_regression.html
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/expected_comparison_result_regression.html Wed Dec 11 04:59:43 2024 +0000
@@ -0,0 +1,591 @@
+
+
+
+
+ Model Training Report
+
+
+
+
+
+
PyCaret Model Training Report
+
+
+ Setup & Best Model
+
+ Best Model Plots
+
+ Feature Importance
+
+ Explainer
+
+
+
+
Setup Parameters
+
+ Parameter | Value |
+
+
+
+ target |
+ MPG |
+
+
+ session_id |
+ 42 |
+
+
+ index |
+ False |
+
+
+
+
+
If you want to know all the experiment setup parameters,
+ please check the PyCaret documentation for
+ the classification/regression exp
function.
+
Best Model: GradientBoostingRegressor
+
+ Parameter | Value |
+
+
+
+ alpha |
+ 0.9 |
+
+
+ ccp_alpha |
+ 0.0 |
+
+
+ criterion |
+ friedman_mse |
+
+
+ init |
+ None |
+
+
+ learning_rate |
+ 0.1 |
+
+
+ loss |
+ squared_error |
+
+
+ max_depth |
+ 3 |
+
+
+ max_features |
+ None |
+
+
+ max_leaf_nodes |
+ None |
+
+
+ min_impurity_decrease |
+ 0.0 |
+
+
+ min_samples_leaf |
+ 1 |
+
+
+ min_samples_split |
+ 2 |
+
+
+ min_weight_fraction_leaf |
+ 0.0 |
+
+
+ n_estimators |
+ 100 |
+
+
+ n_iter_no_change |
+ None |
+
+
+ random_state |
+ 42 |
+
+
+ subsample |
+ 1.0 |
+
+
+ tol |
+ 0.0001 |
+
+
+ validation_fraction |
+ 0.1 |
+
+
+ verbose |
+ 0 |
+
+
+ warm_start |
+ False |
+
+
+
+
+
Comparison Results on the Cross-Validation Set
+
+
+
+
+ Model |
+ MAE |
+ MSE |
+ RMSE |
+ R2 |
+ RMSLE |
+ MAPE |
+ TT (Sec) |
+
+
+
+
+ Gradient Boosting Regressor |
+ 2.2775 |
+ 9.8743 |
+ 3.0921 |
+ 0.8383 |
+ 0.1197 |
+ 0.0980 |
+ 0.681 |
+
+
+ Extra Trees Regressor |
+ 2.2119 |
+ 10.2477 |
+ 3.1304 |
+ 0.8323 |
+ 0.1220 |
+ 0.0949 |
+ 2.212 |
+
+
+ Light Gradient Boosting Machine |
+ 2.3218 |
+ 10.4931 |
+ 3.1818 |
+ 0.8282 |
+ 0.1252 |
+ 0.1011 |
+ 0.263 |
+
+
+ CatBoost Regressor |
+ 2.3204 |
+ 10.5063 |
+ 3.1906 |
+ 0.8270 |
+ 0.1256 |
+ 0.1011 |
+ 8.883 |
+
+
+ Random Forest Regressor |
+ 2.3161 |
+ 11.0170 |
+ 3.2515 |
+ 0.8210 |
+ 0.1252 |
+ 0.0990 |
+ 1.916 |
+
+
+ Extreme Gradient Boosting |
+ 2.4277 |
+ 11.9887 |
+ 3.3949 |
+ 0.8045 |
+ 0.1336 |
+ 0.1057 |
+ 0.497 |
+
+
+ Elastic Net |
+ 2.6119 |
+ 12.1337 |
+ 3.4462 |
+ 0.8029 |
+ 0.1426 |
+ 0.1168 |
+ 0.116 |
+
+
+ Lasso Regression |
+ 2.6238 |
+ 12.2869 |
+ 3.4649 |
+ 0.8011 |
+ 0.1438 |
+ 0.1172 |
+ 0.134 |
+
+
+ Lasso Least Angle Regression |
+ 2.6238 |
+ 12.2868 |
+ 3.4649 |
+ 0.8011 |
+ 0.1438 |
+ 0.1172 |
+ 0.157 |
+
+
+ AdaBoost Regressor |
+ 2.5949 |
+ 12.5846 |
+ 3.4968 |
+ 0.7939 |
+ 0.1378 |
+ 0.1153 |
+ 2.469 |
+
+
+ Bayesian Ridge |
+ 2.6494 |
+ 12.5149 |
+ 3.5121 |
+ 0.7920 |
+ 0.1433 |
+ 0.1194 |
+ 0.268 |
+
+
+ Ridge Regression |
+ 2.6852 |
+ 12.7684 |
+ 3.5480 |
+ 0.7872 |
+ 0.1448 |
+ 0.1212 |
+ 0.108 |
+
+
+ Linear Regression |
+ 2.6893 |
+ 12.7997 |
+ 3.5523 |
+ 0.7866 |
+ 0.1450 |
+ 0.1214 |
+ 0.122 |
+
+
+ Least Angle Regression |
+ 2.7583 |
+ 13.3766 |
+ 3.6327 |
+ 0.7759 |
+ 0.1489 |
+ 0.1249 |
+ 0.165 |
+
+
+ Huber Regressor |
+ 2.6780 |
+ 14.2077 |
+ 3.7197 |
+ 0.7699 |
+ 0.1404 |
+ 0.1138 |
+ 1.508 |
+
+
+ Decision Tree Regressor |
+ 2.6552 |
+ 15.5784 |
+ 3.8636 |
+ 0.7507 |
+ 0.1470 |
+ 0.1108 |
+ 0.253 |
+
+
+ Orthogonal Matching Pursuit |
+ 3.3731 |
+ 20.2491 |
+ 4.4464 |
+ 0.6709 |
+ 0.1767 |
+ 0.1475 |
+ 0.418 |
+
+
+ K Neighbors Regressor |
+ 3.4315 |
+ 21.1052 |
+ 4.5405 |
+ 0.6546 |
+ 0.1692 |
+ 0.1448 |
+ 0.858 |
+
+
+ Dummy Regressor |
+ 6.6547 |
+ 62.8366 |
+ 7.8973 |
+ -0.0391 |
+ 0.3303 |
+ 0.3219 |
+ 0.129 |
+
+
+ Passive Aggressive Regressor |
+ 7.5227 |
+ 84.7568 |
+ 9.0993 |
+ -0.4762 |
+ 0.4067 |
+ 0.3652 |
+ 0.420 |
+
+
+
+
+
Results on the Test Set for the best model
+
+
+
+
+ Model |
+ MAE |
+ MSE |
+ RMSE |
+ R2 |
+ RMSLE |
+ MAPE |
+
+
+
+
+ Gradient Boosting Regressor |
+ 2.2015 |
+ 9.911 |
+ 3.1482 |
+ 0.8273 |
+ 0.1198 |
+ 0.094 |
+
+
+
+
+
+
+
Best Model Plots on the testing set
+
+
+
Residuals
+
+
+
+
+
Error
+
+
+
+
+
Cooks
+
+
+
+
+
Learning
+
+
+
+
+
Vc
+
+
+
+
+
Manifold
+
+
+
+
+
Rfe
+
+
+
+
+
Feature
+
+
+
+
+
Feature_all
+
+
+
+
+
+
+
PyCaret Feature Importance Report
+
+
+
Feature importance analysis from atrained Random Forest
+
Use gini impurity forcalculating feature importance for classificationand Variance Reduction for regression
+
+
+
+
+
SHAP Summary from a trained lightgbm
+
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff -r 000000000000 -r 1f20fe57fdee test-data/expected_model_classification.h5
Binary file test-data/expected_model_classification.h5 has changed
diff -r 000000000000 -r 1f20fe57fdee test-data/expected_model_classification_customized.h5
Binary file test-data/expected_model_classification_customized.h5 has changed
diff -r 000000000000 -r 1f20fe57fdee test-data/expected_model_regression.h5
Binary file test-data/expected_model_regression.h5 has changed
diff -r 000000000000 -r 1f20fe57fdee test-data/pcr.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/pcr.tsv Wed Dec 11 04:59:43 2024 +0000
@@ -0,0 +1,70 @@
+SCGB2A2 FDCSP MUCL1 PIP TFF1 SCGB1D1 SCGB1D2 CALML5 AGR2 CPB1 PCR
+0.1446255786628313 0.1699650673189346 0.9375600251501058 0.6976422301857574 0.0957045465168299 0.1228059681653541 0.1203946505834535 0.6617311325227386 0.5265003537481928 0.184448963872354 0
+0.3704854668147701 0.3635509393089497 0.2775010191320449 0.3686096626765244 0.2591694877907093 0.4492672714584788 0.4011669070407762 0.5399668857265811 0.138846165677197 0.0619176638311385 0
+0.0418563439424193 0.0619893695619427 0.1075443408806682 0.5687699992462811 0.533482902986719 0.0156581264745954 0.0292513439097941 0.237518325905719 0.5496170291303947 0.0824645446376087 0
+0.1909378671820324 0.0613509694356973 0.3629699235132763 0.3137868894020268 0.5779331343522767 0.2338351111554182 0.2060115379572571 0.3704859920788323 0.6821187978713588 0.1031398012202455 0
+0.3100284435655261 0.1002646849961624 0.4381507762676965 0.1638859006598466 0.6850690758064408 0.3223872661416277 0.2810213714435557 0.7561870445875668 0.6041327017133716 0.1048289561698074 0
+0.6841865332879608 0.409759631592916 0.8638163213133329 0.7138334829351185 0.1695946261760247 0.5548865184594425 0.5307263668545956 0.6849694750585335 0.3038312467316744 0.1088018486111768 1
+0.7546533801614157 0.1863769716880304 0.4316006937007274 0.4769875910458192 0.5771919633020173 0.7850330641677468 0.748793759014029 0.1828214236220012 0.6132763234796518 0.7975784274643083 0
+0.6353830250341901 0.3786932164606809 0.5679018316739328 0.1040817305386349 0.1566875663548406 0.5730967334596087 0.5204929854464402 0.5372681453818717 0.0586914392937339 0.0612420018513138 1
+0.0166680578117074 0.3562631356205751 0.1643117231277334 0.0396456150688968 0.0395224454638328 0.0423354268434519 0.0142454438180149 0.5395511338356394 0.0002845365898673 0.0720863766275008 1
+0.5102184953803285 0.4378492371477142 0.9462451029841566 0.8621174019994107 0.2530665117552399 0.5187252849499885 0.474170709322145 0.7239699199859956 0.632793995508936 0.285088815767248 0
+0.0802840504150962 0.423273629770965 0.2952995557275221 0.1087890477789274 0.0522291887849468 0.0730801182999368 0.0557362003408942 0.7561870445875668 0.1211972069273124 0.079768653338108 1
+0.3646619196517429 0.4965031453759028 0.5356212559852415 0.5401492363456967 0.3042607319564912 0.4574153457614728 0.4296053494165464 0.7187110232602242 0.2191393152665416 0.1756923846138254 1
+0.2492696488535895 0.1788094196297279 0.3172368048310312 0.4172587928163735 0.2613195515581284 0.2899212441431563 0.2440540186180673 0.2416685509223127 0.3306930388507797 0.0510057228569691 0
+0.1849281206058544 0.404946525023133 0.3782810869820563 0.3862124253989571 0.1854263068982325 0.2116173196424418 0.1936475678510554 0.6157504321631497 0.213740810237165 0.1145247055802923 1
+0.1111803116592936 0.0732366886400642 0.658262569871002 0.7094619132126927 0.2154003325253901 0.1214036486890638 0.0958502687819588 0.1784013245709367 0.6119766833799871 0.8450707080261888 1
+0.0712373272037342 0.6312270911190652 0.4727114439891937 0.4745003186175425 0.1220261346260424 0.0398497989565679 0.0539202832044053 0.1447910664400697 0.1038404749453997 0.1136531016263183 1
+0.4914637200074481 0.3637661303627403 0.8727155896110713 0.1880049060249549 0.5807308881365894 0.3977004619014389 0.3426642192211879 0.1356664065178225 0.7099880033221571 0.2581434160118376 0
+0.1879650972410383 0.6843649353350882 0.3442040751463059 0.3207073994641743 0.1664095938249101 0.2249227395075267 0.1813425986626459 0.6826135476765304 0.0589759758836014 0.0306615406444463 1
+0.0640140741073664 0.7704054916756926 0.2979182068803504 0.3257436122321728 0.1189880010416458 0.1843019971421925 0.1558607578340107 0.6701045214841611 0.3286013104063491 0.0820591474497138 0
+0.0302670356411359 0.7082081040950856 0.274744180583289 0.0606127049601557 0.0570301075699605 0.0660818130462233 0.0421200996459945 0.5280705465313893 0.1142837368113445 0.0651473280947008 1
+0.0790063372007165 0.0583813328933871 0.0643607796532877 0.4587818531892589 0.2408939457676461 0.2613365234439903 0.2289891176084961 0.148802707493016 0.4699698545018302 0.2556502233062843 0
+0.2611671492869847 0.99817087604278 0.0332478874601847 0.1521278855443564 0.0752789407263476 0.2566909247997874 0.215425462173856 0.2394512075039568 0.0693500261466055 0.0944507881596995 0
+0.0521807803681611 0.0619176392106792 0.2881967235768424 0.5927588167983391 0.7092673090149102 0.3005549463330343 0.267274157598007 0.1770154849344644 0.836160755483097 0.2165158814348358 0
+0.1515406396270875 0.3608610511365673 0.1023001292052151 0.0631684972900379 0.0374658627297797 0.1163127637656598 0.0826012849088763 0.506910963450303 0.0079208834476606 0.053025952176645 1
+0.3485139360629739 0.1691114761388986 0.24853003157582 0.4706700561177993 0.3928340110708253 0.4240587512045991 0.3806411433066737 0.268772656654583 0.3584853425205327 0.5235501983067912 1
+0.5568967620564121 0.1623186118742424 0.1611403223911947 0.1395613355899221 0.536728030287855 0.4711726979696276 0.4458633800970237 0.1440981466218335 0.577986096157987 0.1824963007506604 0
+0.0848940910579336 0.2717073975511259 0.1638280672419868 0.3765374152922717 0.5923158590573105 0.1014122885720931 0.0962894978366329 0.2185031473147533 0.5506936540650282 0.8636581690911671 1
+0.1809858295825922 0.4611257361327298 0.4665759236100074 0.2883865618768971 0.1458103804010336 0.1610008972186223 0.1314409335256326 0.4802955485372097 0.3677058660678581 0.1415376715336851 1
+0.6925590862103285 0.1141373349305291 0.2721946231284243 0.6383793672872287 0.7640939350840996 0.6058817665237763 0.5770551986364232 0.2436743714487859 0.7375034605801472 0.3529185219218529 0
+0.4575304821280667 0.1861617806342397 0.0947136411687889 0.2999458692777317 0.4435007311552253 0.5126175522546773 0.4606726104628294 0.1177526057432111 0.8363760804700238 0.0629244001810774 0
+0.4245089793061825 0.0815861015271391 0.791254119711741 0.2688995018603153 0.551464647476346 0.5569268600671253 0.5007211223285695 0.0851853742861102 0.4569350026146606 0.1206597163571009 0
+0.2982529358510918 0.795525460688181 0.1387194174019387 0.1399656030093941 0.0630462797887328 0.3092745821287344 0.2670250426117739 0.6415416371871832 0.1101079701005875 0.0785524617744234 1
+0.233609636140664 0.4062161522404975 0.9879086028563334 0.9661991325380456 0.1685663348089982 0.1614461834978234 0.1261046282942179 0.6561367167270843 0.3333538404749454 0.2720890792754201 0
+0.681817306272352 0.0667953030965992 0.8540257443118612 0.8451861343127112 0.4818479864853135 0.5086033296779984 0.4599842664219221 0.6089087606946703 0.4956012181242119 0.0572691094099444 0
+0.0810095860594425 0.3372330734303606 0.1099557109396051 0.2463907141829344 0.0774022956270907 0.11465789386236 0.0969188409597482 0.6832408224593548 0.0955581531268264 0.1186327304176267 1
+0.034209326664398 0.0837810502758032 0.2509483110045533 0.3819162275682972 0.6583067913970739 0.1820755657461868 0.1503998951094795 0.0533110626472453 0.7082654034267435 0.0910184253021898 0
+0.4162519984333568 0.2750356858497535 0.6070848677892088 0.554524711702514 0.0419395978980121 0.2435117801482073 0.2163694768585289 0.796989081042443 0.0321910855455412 0.0636676283588846 1
+0.4053304397516485 0.9309523638737258 0.2169403928667665 0.083190012539142 0.1264464520609228 0.292719237031868 0.2567326602858267 0.180611374096469 0.2547832907810144 0.0648838199225691 1
+0.4072116958914137 0.147032874019984 0.2906080936357794 0.2531810364320317 0.2807101887649152 0.4767753298109195 0.4525173724924609 0.455189969438589 0.5452182472546064 0.2009553860394722 0
+0.7831868350594233 0.4952335181585384 0.8876881939598288 0.8637344716772986 0.7519480779631817 0.8452530488818 0.815268126393077 0.4242128066170194 0.8149127933802947 0.2147591602872915 0
+0.1676886232158564 0.191613287330268 0.3303438793347657 0.2457877390488068 0.6168679847492371 0.1524739972751138 0.1259145142257768 0.1578617223798513 0.7106416684610415 0.0419113126085282 0
+0.1353990767077375 0.4476834683059442 0.0851234358914123 0.1091933151983994 0.0497853274840914 0.1853853055527863 0.1404156286875574 0.6329713131195248 0.0379510289458304 0.0741066059471767 1
+0.1346093343692013 0.8714448644655013 0.3202700181716424 0.2830077496008716 0.1089654988214712 0.1790183763665969 0.1542283991084305 0.1640177679229181 0.1901857947030053 0.0553840124862333 1
+0.4774987640211369 0.8638055820559354 0.0964340742480877 0.534366156650199 0.0 0.2400691190642343 0.2139242165989248 0.6193463213251543 0.050839767449014 0.0921602940480936 0
+0.0203727840664666 0.994211360653033 0.3367557745058073 0.0470389124521217 0.0420330789313782 0.0131126840128933 0.0 0.2207131968402855 0.1121996985450182 0.033627696735877 1
+0.297103636025092 0.5374755220176315 0.708134401061279 0.6246068670645389 0.1705361137263543 0.2226298474728342 0.1999213321096106 0.1626319282864456 0.1491433141591559 0.1546049742234955 1
+0.1330298496921289 0.130269490929697 0.3401344563362375 0.3424076523026113 0.7599340290993104 0.1119795301232844 0.1110397272846466 0.3139947921605239 0.4837198929527208 0.9153260406883644 0
+0.0762775526976442 0.4896385507599831 0.1103011794294241 0.0916522203874115 0.0825571068955616 0.0604127205662446 0.0665989248721646 0.619827718251508 0.0961349164846657 0.1151328013621346 0
+0.0705117915593879 0.3750134494408619 0.0521519232230828 0.0816414627628595 0.1074364162042694 0.0879805934935035 0.0779598793759014 0.3037614605290989 0.083392291365468 0.0954642811294366 1
+0.1357586341952011 0.8754761102065116 0.1697563065272815 0.135731073090179 0.1530885465702476 0.1466786295816303 0.1258555133079848 0.562096556553198 0.1268187271217201 0.1500915521982663 0
+0.2510289122743937 0.3406259190451255 0.581907124251197 0.7472232309874403 0.6858369557233763 0.1766656697570863 0.127546872951357 0.1621505313600921 0.6288335537851057 0.2456842091038695 1
+0.1956699005438307 0.6519571626342255 0.389108069452985 0.1192040728229514 0.0828842905123428 0.2295018775130429 0.1958437131244264 0.5566990758637792 0.0865606447445322 0.1591859624467071 0
+0.3356469145473107 0.3457905043360997 0.2730859318321574 0.6796352000438528 0.4441484211721186 0.2817731698401622 0.2434836764127442 0.5422498741803488 0.5881448214340645 0.6631892596771687 1
+0.1143970670382094 0.5981809182919569 0.180521104670043 0.2306037288530453 0.0763072320933741 0.193726115707972 0.1536580569031074 0.8881919169079727 0.450452182472546 0.078484895576441 0
+0.5797222418409345 0.2175079441364024 0.2170785802626942 0.4617350609484524 0.3569039081749164 0.480271159405842 0.4535203880949259 0.2057096593022661 0.6266726137377342 1.0000000000000002 0
+0.133877378055436 0.8522713415727596 0.1536851123808997 0.2878452546542144 0.0997108765182321 0.1765327484797129 0.1496459944932476 0.431338939905617 0.2374342489772063 0.138774214036202 0
+0.7981790981527734 0.2165897956402292 0.7117894576835645 0.6045784998252742 0.6593417599807698 0.8219519489582297 0.7853284384423758 0.5817317160341645 0.4874650096896245 0.8523475875489013 0
+0.2857518924923112 0.924087769257806 0.2979873005783142 0.2512282192362771 0.2142184651749765 0.3015717941049414 0.2615051789694506 0.6093245125856119 0.1986834415085053 0.1229502104687067 1
+0.0858058261154308 0.8986163215241265 0.4177335885193911 0.2842205518592875 0.1110287587721933 0.2480909181537235 0.2154910187491805 0.5918994026301777 0.0901596480974499 0.0779511226123795 1
+0.4424162263157557 0.4638156243051122 0.9354181205132276 0.6676031053219408 0.4695285217310017 0.484285381982521 0.4451094794807919 0.540032530761993 0.6746285643975516 0.1133828368343885 0
+0.1224678484978843 0.6581116267726363 0.6069466803932813 0.0373639023454362 0.0411183002477246 0.1476356627787193 0.1003671168218172 0.7746478873239436 0.013611615245009 0.0415734816186158 0
+0.8225776419449493 0.6945506452145096 0.9266570396114168 0.366999444988797 0.5741872158009655 0.5425381317914466 0.488796381277042 0.5683838921670884 0.7889199913870006 0.0824645446376087 1
+0.1426223298041053 0.8913285178357517 0.0756023243119994 0.1456733108131257 0.0894279628479664 0.1422191207257502 0.1296184607316113 0.6501192551476648 0.1399920022147712 0.136152645554482 1
+0.7436098287607467 0.0508783381512219 0.9367999944725042 0.8668932391413087 0.6116530785307452 0.6847572525171968 0.6387570473318473 0.0916842327918833 0.5387431172905965 0.5302865482456436 0
+0.5959857974792453 0.3464289044623451 0.422286863215206 0.1385540930363224 0.4875369750873046 0.4499052935898715 0.4171758227350202 0.0580812685538398 0.5668276477283215 0.0697958825158949 0
+0.5064559831007982 0.1684730760126532 0.8784434571722713 0.6313218174218702 0.1444549054172259 0.304369786993653 0.2628228661334732 0.7438093084660212 0.6597249992309823 0.4005459348796984 1
+0.6454570553525911 0.1390421128892268 0.3442040751463059 0.3152600672865434 0.3017567757056149 0.6874954308310904 0.6415169791530091 0.1059292054762546 0.5925359131317481 0.203847219313122 0
+0.0556351005155797 0.9816872413224208 0.0713945181060035 0.0874862103698019 0.0640745711557594 0.0695842887050144 0.030064245443818 0.4076192004434686 0.1690147343812482 0.0523502901968203 1
+0.3732206719872614 0.2336401001355704 0.362348080231602 0.2704480516365979 0.6884010069242736 0.3512311833316719 0.3141667759276256 0.0 0.7696176443446432 0.3280676743038993 0
diff -r 000000000000 -r 1f20fe57fdee test-data/predictions_classification.csv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/predictions_classification.csv Wed Dec 11 04:59:43 2024 +0000
@@ -0,0 +1,70 @@
+SCGB2A2,FDCSP,MUCL1,PIP,TFF1,SCGB1D1,SCGB1D2,CALML5,AGR2,CPB1,PCR,prediction_label,prediction_score
+0.14462557,0.16996507,0.93756,0.6976422,0.09570455,0.12280597,0.12039465,0.6617311,0.52650034,0.18444896,0,0,0.7168
+0.37048545,0.36355093,0.27750102,0.36860967,0.2591695,0.44926727,0.40116692,0.5399669,0.13884616,0.061917663,0,1,0.8633
+0.041856345,0.06198937,0.10754434,0.56877,0.5334829,0.015658127,0.029251345,0.23751833,0.54961705,0.082464546,0,0,0.6666
+0.19093786,0.061350968,0.36296993,0.3137869,0.57793313,0.23383512,0.20601153,0.370486,0.6821188,0.1031398,0,0,0.859
+0.31002843,0.10026468,0.43815076,0.1638859,0.6850691,0.32238728,0.28102136,0.756187,0.6041327,0.104828954,0,0,0.8391
+0.6841865,0.40975964,0.8638163,0.7138335,0.16959463,0.5548865,0.5307264,0.6849695,0.30383125,0.10880185,1,1,0.8534
+0.7546534,0.18637697,0.4316007,0.4769876,0.57719195,0.78503305,0.7487938,0.18282142,0.6132763,0.79757845,0,0,0.7254
+0.635383,0.37869322,0.56790185,0.10408173,0.15668757,0.57309675,0.520493,0.53726816,0.05869144,0.061242003,1,1,0.7164
+0.016668057,0.35626313,0.16431172,0.039645616,0.039522447,0.04233543,0.014245444,0.53955114,0.0002845366,0.07208638,1,1,0.7462
+0.5102185,0.43784922,0.9462451,0.8621174,0.2530665,0.5187253,0.4741707,0.72396994,0.632794,0.2850888,0,0,0.7465
+0.08028405,0.42327362,0.29529956,0.10878905,0.05222919,0.073080115,0.0557362,0.756187,0.12119721,0.07976865,1,1,0.6899
+0.36466193,0.49650314,0.5356213,0.5401492,0.30426073,0.45741534,0.42960533,0.718711,0.21913932,0.17569238,1,1,0.6959
+0.24926965,0.17880942,0.3172368,0.4172588,0.26131955,0.28992125,0.24405402,0.24166855,0.33069304,0.05100572,0,0,0.5877
+0.18492812,0.40494654,0.3782811,0.38621244,0.18542631,0.21161732,0.19364756,0.61575043,0.21374081,0.11452471,1,1,0.865
+0.11118031,0.07323669,0.65826255,0.7094619,0.21540034,0.12140365,0.09585027,0.17840132,0.6119767,0.8450707,1,0,0.7168
+0.071237326,0.6312271,0.47271144,0.47450033,0.12202614,0.0398498,0.053920284,0.14479107,0.10384048,0.1136531,1,1,0.8216
+0.49146372,0.36376613,0.8727156,0.18800491,0.5807309,0.39770046,0.3426642,0.1356664,0.709988,0.25814342,0,0,0.9118
+0.1879651,0.6843649,0.34420407,0.3207074,0.1664096,0.22492275,0.1813426,0.68261355,0.058975976,0.03066154,1,1,0.8174
+0.06401408,0.7704055,0.2979182,0.32574362,0.118988,0.184302,0.15586075,0.6701045,0.3286013,0.082059145,0,0,0.6945
+0.030267036,0.7082081,0.27474418,0.060612705,0.057030108,0.066081814,0.0421201,0.52807057,0.11428374,0.065147325,1,1,0.6086
+0.07900634,0.058381334,0.06436078,0.45878184,0.24089395,0.26133654,0.22898912,0.14880271,0.46996987,0.25565022,0,0,0.7225
+0.26116714,0.99817085,0.033247888,0.15212789,0.07527894,0.25669092,0.21542546,0.23945121,0.06935003,0.09445079,0,1,0.6018
+0.052180782,0.06191764,0.2881967,0.59275883,0.7092673,0.30055496,0.26727417,0.17701548,0.8361608,0.21651588,0,0,0.7553
+0.15154064,0.36086106,0.10230013,0.063168496,0.037465863,0.116312765,0.08260129,0.506911,0.007920884,0.053025953,1,1,0.7462
+0.34851393,0.16911148,0.24853003,0.47067004,0.392834,0.42405877,0.38064113,0.26877266,0.35848534,0.5235502,1,0,0.6467
+0.55689675,0.16231862,0.16114032,0.13956134,0.536728,0.4711727,0.44586337,0.14409815,0.5779861,0.1824963,0,0,0.8867
+0.08489409,0.2717074,0.16382806,0.3765374,0.59231585,0.10141229,0.0962895,0.21850315,0.55069363,0.8636582,1,0,0.75
+0.18098582,0.46112573,0.46657592,0.28838655,0.14581038,0.16100089,0.13144094,0.48029554,0.36770585,0.14153767,1,0,0.9076
+0.69255906,0.11413734,0.27219462,0.6383794,0.76409394,0.60588175,0.5770552,0.24367437,0.73750347,0.35291854,0,0,0.7254
+0.45753047,0.18616179,0.09471364,0.29994586,0.44350073,0.5126175,0.46067262,0.117752604,0.8363761,0.0629244,0,0,0.8391
+0.424509,0.0815861,0.7912541,0.2688995,0.5514647,0.55692685,0.5007211,0.08518537,0.456935,0.12065972,0,0,0.8391
+0.29825294,0.79552543,0.13871942,0.13996561,0.06304628,0.30927458,0.26702505,0.64154166,0.11010797,0.07855246,1,1,0.6384
+0.23360963,0.40621614,0.9879086,0.96619916,0.16856633,0.16144618,0.12610462,0.6561367,0.33335385,0.2720891,0,0,0.6985
+0.6818173,0.066795304,0.8540257,0.8451861,0.48184797,0.50860333,0.45998427,0.6089088,0.4956012,0.057269108,0,0,0.6379
+0.08100959,0.33723307,0.10995571,0.24639072,0.07740229,0.114657894,0.09691884,0.68324083,0.09555815,0.118632734,1,1,0.7462
+0.034209326,0.08378105,0.2509483,0.38191622,0.6583068,0.18207556,0.1503999,0.05331106,0.7082654,0.09101842,0,0,0.6729
+0.416252,0.27503568,0.6070849,0.5545247,0.041939598,0.24351178,0.21636948,0.7969891,0.032191087,0.063667625,1,1,0.8943
+0.40533045,0.93095237,0.21694039,0.08319001,0.12644646,0.29271924,0.25673267,0.18061137,0.2547833,0.06488382,1,1,0.5404
+0.4072117,0.14703287,0.2906081,0.25318104,0.2807102,0.47677532,0.45251736,0.45518997,0.5452182,0.20095539,0,0,0.8867
+0.78318685,0.4952335,0.8876882,0.8637345,0.75194806,0.84525305,0.8152681,0.4242128,0.8149128,0.21475916,0,0,0.8332
+0.16768862,0.19161329,0.33034387,0.24578774,0.61686796,0.152474,0.12591451,0.15786172,0.7106417,0.04191131,0,0,0.8555
+0.13539907,0.44768345,0.085123435,0.10919332,0.049785327,0.1853853,0.14041562,0.6329713,0.03795103,0.074106604,1,1,0.6838
+0.13460934,0.8714449,0.32027003,0.28300774,0.1089655,0.17901838,0.1542284,0.16401777,0.1901858,0.055384014,1,1,0.6086
+0.47749877,0.8638056,0.09643407,0.53436613,0.0,0.24006912,0.21392421,0.6193463,0.050839767,0.09216029,0,1,0.8174
+0.020372784,0.9942114,0.33675578,0.047038913,0.04203308,0.013112684,0.0,0.2207132,0.1121997,0.033627696,1,1,0.6086
+0.29710364,0.5374755,0.7081344,0.62460685,0.17053612,0.22262985,0.19992132,0.16263193,0.14914331,0.15460497,1,1,0.749
+0.13302985,0.1302695,0.34013444,0.34240764,0.759934,0.11197953,0.11103973,0.3139948,0.4837199,0.91532606,0,0,0.75
+0.076277554,0.48963854,0.11030118,0.09165222,0.082557105,0.06041272,0.06659892,0.61982775,0.096134916,0.1151328,0,1,0.6086
+0.07051179,0.37501344,0.052151922,0.081641465,0.10743642,0.08798059,0.07795988,0.30376145,0.08339229,0.09546428,1,1,0.6899
+0.13575864,0.8754761,0.16975631,0.13573107,0.15308854,0.14667863,0.12585552,0.56209654,0.12681873,0.15009156,0,1,0.509
+0.25102893,0.3406259,0.58190715,0.74722326,0.685837,0.17666566,0.12754688,0.16215053,0.62883353,0.2456842,1,0,0.75
+0.1956699,0.65195715,0.38910806,0.119204074,0.08288429,0.22950187,0.19584371,0.5566991,0.086560644,0.15918596,0,1,0.5019
+0.33564693,0.3457905,0.27308592,0.6796352,0.44414842,0.28177318,0.24348368,0.54224986,0.58814484,0.66318923,1,0,0.7553
+0.114397064,0.5981809,0.1805211,0.23060372,0.07630723,0.19372612,0.15365806,0.88819194,0.45045218,0.07848489,0,0,0.9067
+0.5797222,0.21750794,0.21707858,0.46173507,0.3569039,0.48027116,0.4535204,0.20570967,0.6266726,1.0,0,0,0.7254
+0.13387738,0.8522713,0.15368511,0.28784525,0.099710874,0.17653275,0.149646,0.43133894,0.23743425,0.13877422,0,0,0.5535
+0.7981791,0.2165898,0.7117894,0.6045785,0.65934175,0.8219519,0.78532845,0.58173174,0.48746502,0.8523476,0,0,0.7254
+0.28575188,0.92408776,0.2979873,0.2512282,0.21421847,0.3015718,0.2615052,0.6093245,0.19868344,0.12295021,1,1,0.6384
+0.085805826,0.8986163,0.41773358,0.28422055,0.11102876,0.24809092,0.21549101,0.5918994,0.09015965,0.077951126,1,1,0.6018
+0.44241622,0.46381563,0.9354181,0.66760314,0.46952853,0.48428538,0.4451095,0.5400325,0.67462856,0.11338284,0,0,0.769
+0.122467846,0.65811163,0.6069467,0.0373639,0.0411183,0.14763567,0.100367114,0.7746479,0.013611616,0.04157348,0,1,0.6086
+0.82257766,0.69455063,0.926657,0.36699945,0.5741872,0.5425381,0.48879638,0.5683839,0.78892,0.082464546,1,0,0.769
+0.14262234,0.8913285,0.07560232,0.1456733,0.08942796,0.14221913,0.12961847,0.65011925,0.139992,0.13615264,1,1,0.6086
+0.74360985,0.05087834,0.9368,0.86689323,0.6116531,0.68475723,0.63875705,0.09168423,0.53874314,0.53028655,0,0,0.7254
+0.5959858,0.3464289,0.42228687,0.1385541,0.48753697,0.4499053,0.41717583,0.05808127,0.56682765,0.069795884,0,0,0.8391
+0.50645596,0.16847308,0.8784435,0.6313218,0.14445491,0.30436978,0.26282287,0.7438093,0.659725,0.40054592,1,0,0.6903
+0.645457,0.13904211,0.34420407,0.31526005,0.30175677,0.6874954,0.641517,0.1059292,0.5925359,0.20384721,0,0,0.8867
+0.055635102,0.98168725,0.07139452,0.08748621,0.06407457,0.06958429,0.030064246,0.4076192,0.16901474,0.05235029,1,1,0.6086
+0.37322068,0.2336401,0.36234808,0.27044806,0.688401,0.3512312,0.31416678,0.0,0.7696176,0.32806766,0,0,0.8867
diff -r 000000000000 -r 1f20fe57fdee test-data/predictions_regression.csv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/predictions_regression.csv Wed Dec 11 04:59:43 2024 +0000
@@ -0,0 +1,399 @@
+Cylinders,Displacement,Horsepower,Weight,Acceleration,ModelYear,Origin,MPG,prediction_label
+8,307.0,130.0,3504.0,12.0,70,1,18.0,15.496568242719972
+8,350.0,165.0,3693.0,11.5,70,1,15.0,15.295167199600813
+8,318.0,150.0,3436.0,11.0,70,1,18.0,15.751078797169987
+8,304.0,150.0,3433.0,12.0,70,1,16.0,15.751078797169987
+8,302.0,140.0,3449.0,10.5,70,1,17.0,15.496568242719972
+8,429.0,198.0,4341.0,10.0,70,1,15.0,13.47524684916836
+8,454.0,220.0,4354.0,9.0,70,1,14.0,13.584079309576778
+8,440.0,215.0,4312.0,8.5,70,1,14.0,13.192384042871243
+8,455.0,225.0,4425.0,10.0,70,1,14.0,12.69959324150899
+8,390.0,190.0,3850.0,8.5,70,1,15.0,13.526017793240083
+8,383.0,170.0,3563.0,10.0,70,1,15.0,15.382442402453687
+8,340.0,160.0,3609.0,8.0,70,1,14.0,14.948584364357012
+8,400.0,150.0,3761.0,9.5,70,1,15.0,13.984349684908732
+8,455.0,225.0,3086.0,10.0,70,1,14.0,15.718811053746911
+4,113.0,95.00,2372.0,15.0,70,3,24.0,24.04107163431282
+6,198.0,95.00,2833.0,15.5,70,1,22.0,18.69046864753495
+6,199.0,97.00,2774.0,15.5,70,1,18.0,18.57427885892153
+6,200.0,85.00,2587.0,16.0,70,1,21.0,20.821149210852692
+4,97.0,88.00,2130.0,14.5,70,3,27.0,28.260061496908907
+4,97.0,46.00,1835.0,20.5,70,2,26.0,26.795319724347664
+4,110.0,87.00,2672.0,17.5,70,2,25.0,22.758221490905793
+4,107.0,90.00,2430.0,14.5,70,2,24.0,26.08420833813644
+4,104.0,95.00,2375.0,17.5,70,2,25.0,23.298678581918434
+4,121.0,113.0,2234.0,12.5,70,2,26.0,25.61527601855662
+6,199.0,90.00,2648.0,15.0,70,1,21.0,21.62386372514712
+8,360.0,215.0,4615.0,14.0,70,1,10.0,12.499203604751683
+8,307.0,200.0,4376.0,15.0,70,1,10.0,12.127714793001413
+8,318.0,210.0,4382.0,13.5,70,1,11.0,12.328052408601287
+8,304.0,193.0,4732.0,18.5,70,1,9.0,10.069501289521417
+4,97.0,88.00,2130.0,14.5,71,3,27.0,28.260061496908907
+4,140.0,90.00,2264.0,15.5,71,1,28.0,24.1187999092876
+4,113.0,95.00,2228.0,14.0,71,3,25.0,24.08032505703814
+4,98.0,?,2046.0,19.0,71,1,25.0,26.726784109653302
+6,232.0,100.0,2634.0,13.0,71,1,19.0,20.36926456258466
+6,225.0,105.0,3439.0,15.5,71,1,16.0,17.213306929165558
+6,250.0,100.0,3329.0,15.5,71,1,17.0,16.7726121207363
+6,250.0,88.00,3302.0,15.5,71,1,19.0,18.032001571463223
+6,232.0,100.0,3288.0,15.5,71,1,18.0,17.508638427058603
+8,350.0,165.0,4209.0,12.0,71,1,14.0,13.872600628483884
+8,400.0,175.0,4464.0,11.5,71,1,14.0,12.763313270455674
+8,351.0,153.0,4154.0,13.5,71,1,14.0,13.800661505379043
+8,318.0,150.0,4096.0,13.0,71,1,14.0,13.986726468750168
+8,383.0,180.0,4955.0,11.5,71,1,12.0,12.47748465212163
+8,400.0,170.0,4746.0,12.0,71,1,13.0,12.598632864333123
+8,400.0,175.0,5140.0,12.0,71,1,13.0,12.47748465212163
+6,258.0,110.0,2962.0,13.5,71,1,18.0,18.6134791783886
+4,140.0,72.00,2408.0,19.0,71,1,22.0,20.507637823525453
+6,250.0,100.0,3282.0,15.0,71,1,19.0,17.2851327993371
+6,250.0,88.00,3139.0,14.5,71,1,18.0,18.3862192067906
+4,122.0,86.00,2220.0,14.0,71,1,23.0,24.55551066818654
+4,116.0,90.00,2123.0,14.0,71,2,28.0,27.310511747713683
+4,79.0,70.00,2074.0,19.5,71,2,30.0,30.428927901521124
+4,88.0,76.00,2065.0,14.5,71,2,30.0,28.57037976485042
+4,71.0,65.00,1773.0,19.0,71,3,31.0,36.65148739336434
+4,72.0,69.00,1613.0,18.0,71,3,35.0,34.59693136902584
+4,97.0,60.00,1834.0,19.0,71,2,27.0,29.361661670375785
+4,91.0,70.00,1955.0,20.5,71,1,26.0,30.16300924555696
+4,113.0,95.00,2278.0,15.5,72,3,24.0,22.77137953569319
+4,97.5,80.00,2126.0,17.0,72,1,25.0,26.98790612863828
+4,97.0,54.00,2254.0,23.5,72,2,23.0,22.765419622541582
+4,140.0,90.00,2408.0,19.5,72,1,20.0,23.424826127292256
+4,122.0,86.00,2226.0,16.5,72,1,21.0,23.87254823275849
+8,350.0,165.0,4274.0,12.0,72,1,13.0,13.339654598544426
+8,400.0,175.0,4385.0,12.0,72,1,14.0,12.39284979473466
+8,318.0,150.0,4135.0,13.5,72,1,15.0,13.601349225808425
+8,351.0,153.0,4129.0,13.0,72,1,14.0,13.673288348913266
+8,304.0,150.0,3672.0,11.5,72,1,17.0,15.20998076029648
+8,429.0,208.0,4633.0,11.0,72,1,11.0,12.500280961938373
+8,350.0,155.0,4502.0,13.5,72,1,13.0,12.492061867780215
+8,350.0,160.0,4456.0,13.5,72,1,12.0,12.492061867780215
+8,400.0,190.0,4422.0,12.5,72,1,13.0,12.564000990885056
+3,70.0,97.00,2330.0,13.5,72,3,19.0,19.15318495241919
+8,304.0,150.0,3892.0,12.5,72,1,15.0,13.78741418917955
+8,307.0,130.0,4098.0,14.0,72,1,13.0,13.601349225808425
+8,302.0,140.0,4294.0,16.0,72,1,13.0,13.243870651869415
+8,318.0,150.0,4077.0,14.0,72,1,14.0,13.601349225808425
+4,121.0,112.0,2933.0,14.5,72,2,18.0,20.434916485822264
+4,121.0,76.00,2511.0,18.0,72,2,22.0,25.47968061154449
+4,120.0,87.00,2979.0,19.5,72,2,21.0,19.631543487831994
+4,96.0,69.00,2189.0,18.0,72,2,26.0,28.710715370343323
+4,122.0,86.00,2395.0,16.0,72,1,22.0,23.87254823275849
+4,97.0,92.00,2288.0,17.0,72,3,28.0,26.892499698588658
+4,120.0,97.00,2506.0,14.5,72,3,23.0,23.013268827190142
+4,98.0,80.00,2164.0,15.0,72,1,28.0,27.357673247646165
+4,97.0,88.00,2100.0,16.5,72,3,27.0,27.44707328444399
+8,350.0,175.0,4100.0,13.0,73,1,13.0,13.673288348913266
+8,304.0,150.0,3672.0,11.5,73,1,14.0,15.20998076029648
+8,350.0,145.0,3988.0,13.0,73,1,13.0,13.673288348913266
+8,302.0,137.0,4042.0,14.5,73,1,14.0,13.599145656672007
+8,318.0,150.0,3777.0,12.5,73,1,15.0,13.78741418917955
+8,429.0,198.0,4952.0,11.5,73,1,12.0,12.335600555815821
+8,400.0,150.0,4464.0,12.0,73,1,13.0,12.564000990885056
+8,351.0,158.0,4363.0,13.0,73,1,13.0,13.027831074966864
+8,318.0,150.0,4237.0,14.5,73,1,14.0,13.599145656672007
+8,440.0,215.0,4735.0,11.0,73,1,13.0,12.429305293125752
+8,455.0,225.0,4951.0,11.0,73,1,12.0,12.335600555815821
+8,360.0,175.0,3821.0,11.0,73,1,13.0,13.673288348913266
+6,225.0,105.0,3121.0,16.5,73,1,18.0,17.607694672556782
+6,250.0,100.0,3278.0,18.0,73,1,16.0,15.951491169217068
+6,232.0,100.0,2945.0,16.0,73,1,18.0,18.54692679316855
+6,250.0,88.00,3021.0,16.5,73,1,18.0,18.076287073752614
+6,198.0,95.00,2904.0,16.0,73,1,23.0,18.650940367034455
+4,97.0,46.00,1950.0,21.0,73,2,26.0,26.314841052453858
+8,400.0,150.0,4997.0,14.0,73,1,11.0,12.234575373968148
+8,400.0,167.0,4906.0,12.5,73,1,12.0,12.399320584762505
+8,360.0,170.0,4654.0,13.0,73,1,13.0,12.564000990885056
+8,350.0,180.0,4499.0,12.5,73,1,12.0,12.564000990885056
+6,232.0,100.0,2789.0,15.0,73,1,18.0,18.590599688679614
+4,97.0,88.00,2279.0,19.0,73,3,20.0,26.09947534200486
+4,140.0,72.00,2401.0,19.5,73,1,21.0,21.09709883016186
+4,108.0,94.00,2379.0,16.5,73,3,22.0,23.287675679314187
+3,70.0,90.00,2124.0,13.5,73,3,18.0,19.49064602619855
+4,122.0,85.00,2310.0,18.5,73,1,19.0,22.272129580076363
+6,155.0,107.0,2472.0,14.0,73,1,21.0,22.085341610449962
+4,98.0,90.00,2265.0,15.5,73,2,26.0,24.814516239516806
+8,350.0,145.0,4082.0,13.0,73,1,15.0,13.673288348913266
+8,400.0,230.0,4278.0,9.5,73,1,16.0,13.589988180967946
+4,68.0,49.00,1867.0,19.5,73,2,29.0,28.563071420166143
+4,116.0,75.00,2158.0,15.5,73,2,24.0,26.819477086588783
+4,114.0,91.00,2582.0,14.0,73,2,20.0,22.032419732018813
+4,121.0,112.0,2868.0,15.5,73,2,19.0,19.9299381097487
+8,318.0,150.0,3399.0,11.0,73,1,15.0,15.963357136519747
+4,121.0,110.0,2660.0,14.0,73,2,24.0,22.001817227331795
+6,156.0,122.0,2807.0,13.5,73,3,20.0,20.258716085253806
+8,350.0,180.0,3664.0,11.0,73,1,11.0,15.095854920030195
+6,198.0,95.00,3102.0,16.5,74,1,20.0,18.393781104066345
+6,200.0,?,2875.0,17.0,74,1,21.0,19.41536514160801
+6,232.0,100.0,2901.0,16.0,74,1,19.0,19.025464889216185
+6,250.0,100.0,3336.0,17.0,74,1,15.0,17.22453762482498
+4,79.0,67.00,1950.0,19.0,74,3,31.0,36.677905122341514
+4,122.0,80.00,2451.0,16.5,74,1,26.0,25.733787073782732
+4,71.0,65.00,1836.0,21.0,74,3,32.0,36.938420175716814
+4,140.0,75.00,2542.0,17.0,74,1,25.0,26.407738463061392
+6,250.0,100.0,3781.0,17.0,74,1,16.0,15.921043568152513
+6,258.0,110.0,3632.0,18.0,74,1,16.0,16.159431588712753
+6,225.0,105.0,3613.0,16.5,74,1,18.0,17.573151331659965
+8,302.0,140.0,4141.0,14.0,74,1,16.0,14.837195418720574
+8,350.0,150.0,4699.0,14.5,74,1,13.0,14.27843701750991
+8,318.0,150.0,4457.0,13.5,74,1,14.0,14.130950543759527
+8,302.0,140.0,4638.0,16.0,74,1,14.0,13.842210524738244
+8,304.0,150.0,4257.0,15.5,74,1,14.0,14.517474941013743
+4,98.0,83.00,2219.0,16.5,74,2,29.0,26.61098468095076
+4,79.0,67.00,1963.0,15.5,74,2,26.0,33.301811228871216
+4,97.0,78.00,2300.0,14.5,74,2,26.0,26.94927516532924
+4,76.0,52.00,1649.0,16.5,74,3,31.0,31.511705040560646
+4,83.0,61.00,2003.0,19.0,74,3,32.0,30.41356415688067
+4,90.0,75.00,2125.0,14.5,74,1,28.0,28.270073143116015
+4,90.0,75.00,2108.0,15.5,74,2,24.0,26.97800646631739
+4,116.0,75.00,2246.0,14.0,74,2,26.0,28.192870049341337
+4,120.0,97.00,2489.0,15.0,74,3,24.0,23.33393520035609
+4,108.0,93.00,2391.0,15.5,74,3,26.0,25.445273952556004
+4,79.0,67.00,2000.0,16.0,74,2,31.0,35.56465031149246
+6,225.0,95.00,3264.0,16.0,75,1,19.0,18.150563443287634
+6,250.0,105.0,3459.0,16.0,75,1,18.0,16.95055998036272
+6,250.0,72.00,3432.0,21.0,75,1,15.0,16.432101773623355
+6,250.0,72.00,3158.0,19.5,75,1,15.0,16.72202626217873
+8,400.0,170.0,4668.0,11.5,75,1,16.0,14.675821181548951
+8,350.0,145.0,4440.0,14.0,75,1,15.0,14.45419201555731
+8,318.0,150.0,4498.0,14.5,75,1,16.0,14.354662258231306
+8,351.0,148.0,4657.0,13.5,75,1,14.0,14.07802211023864
+6,231.0,110.0,3907.0,21.0,75,1,17.0,16.422726575190747
+6,250.0,105.0,3897.0,18.5,75,1,16.0,15.526268042166263
+6,258.0,110.0,3730.0,19.0,75,1,15.0,16.310131605605687
+6,225.0,95.00,3785.0,19.0,75,1,18.0,16.371236988160177
+6,231.0,110.0,3039.0,15.0,75,1,21.0,19.182501536991843
+8,262.0,110.0,3221.0,13.5,75,1,20.0,18.094419278000096
+8,302.0,129.0,3169.0,12.0,75,1,13.0,16.055417977943737
+4,97.0,75.00,2171.0,16.0,75,3,29.0,28.71935945141869
+4,140.0,83.00,2639.0,17.0,75,1,23.0,24.258792465816725
+6,232.0,100.0,2914.0,16.0,75,1,20.0,19.459142932434663
+4,140.0,78.00,2592.0,18.5,75,1,23.0,23.67401742769011
+4,134.0,96.00,2702.0,13.5,75,3,24.0,25.181815154846852
+4,90.0,71.00,2223.0,16.5,75,2,25.0,27.484617562939835
+4,119.0,97.00,2545.0,17.0,75,3,24.0,23.669226653710393
+6,171.0,97.00,2984.0,14.5,75,1,18.0,20.233580869870178
+4,90.0,70.00,1937.0,14.0,75,2,29.0,29.969626949900476
+6,232.0,90.00,3211.0,17.0,75,1,19.0,19.07881915792574
+4,115.0,95.00,2694.0,15.0,75,2,23.0,22.88813141831251
+4,120.0,88.00,2957.0,17.0,75,2,23.0,22.95510599920703
+4,121.0,98.00,2945.0,14.5,75,2,22.0,20.885552103141446
+4,121.0,115.0,2671.0,13.5,75,2,25.0,23.95037548158147
+4,91.0,53.00,1795.0,17.5,75,3,33.0,28.752745477518395
+4,107.0,86.00,2464.0,15.5,76,2,28.0,25.047796253620568
+4,116.0,81.00,2220.0,16.9,76,2,25.0,25.655582363326502
+4,140.0,92.00,2572.0,14.9,76,1,25.0,25.06410096354108
+4,98.0,79.00,2255.0,17.7,76,1,26.0,26.12595992927592
+4,101.0,83.00,2202.0,15.3,76,2,27.0,26.731398422740575
+8,305.0,140.0,4215.0,13.0,76,1,17.5,15.324087833959068
+8,318.0,150.0,4190.0,13.0,76,1,16.0,15.538831753173486
+8,304.0,120.0,3962.0,13.9,76,1,15.5,15.506031727331658
+8,351.0,152.0,4215.0,12.8,76,1,14.5,15.56610495871359
+6,225.0,100.0,3233.0,15.4,76,1,22.0,19.877212596494854
+6,250.0,105.0,3353.0,14.5,76,1,22.0,19.23434144446747
+6,200.0,81.00,3012.0,17.6,76,1,24.0,20.922930918294345
+6,232.0,90.00,3085.0,17.6,76,1,22.5,20.49874938539158
+4,85.0,52.00,2035.0,22.2,76,1,29.0,29.55574669268143
+4,98.0,60.00,2164.0,22.1,76,1,24.5,29.380227490187057
+4,90.0,70.00,1937.0,14.2,76,2,29.0,30.29763857349054
+4,91.0,53.00,1795.0,17.4,76,3,33.0,28.719979599334742
+6,225.0,100.0,3651.0,17.7,76,1,20.0,19.18926151713551
+6,250.0,78.00,3574.0,21.0,76,1,18.0,18.693726149494104
+6,250.0,110.0,3645.0,16.2,76,1,18.5,18.222793100625868
+6,258.0,95.00,3193.0,17.8,76,1,17.5,18.44352419487775
+4,97.0,71.00,1825.0,12.2,76,2,29.5,29.431058489620224
+4,85.0,70.00,1990.0,17.0,76,3,32.0,32.459847752368894
+4,97.0,75.00,2155.0,16.4,76,3,28.0,28.69463557890584
+4,140.0,72.00,2565.0,13.6,76,1,26.5,24.709537970299326
+4,130.0,102.0,3150.0,15.7,76,2,20.0,20.49065816614162
+8,318.0,150.0,3940.0,13.2,76,1,13.0,15.48659784698173
+4,120.0,88.00,3270.0,21.9,76,2,19.0,21.739181344776462
+6,156.0,108.0,2930.0,15.5,76,3,19.0,21.321657570104467
+6,168.0,120.0,3820.0,16.7,76,2,16.5,18.41868808000314
+8,350.0,180.0,4380.0,12.1,76,1,16.5,15.439895282391651
+8,350.0,145.0,4055.0,12.0,76,1,13.0,15.275712476175165
+8,302.0,130.0,3870.0,15.0,76,1,13.0,15.08358539525977
+8,318.0,150.0,3755.0,14.0,76,1,13.0,15.300532883610606
+4,98.0,68.00,2045.0,18.5,77,3,31.5,29.658442248333536
+4,111.0,80.00,2155.0,14.8,77,1,30.0,29.33602716439095
+4,79.0,58.00,1825.0,18.6,77,2,36.0,32.97115488237283
+4,122.0,96.00,2300.0,15.5,77,1,25.5,26.639769625038653
+4,85.0,70.00,1945.0,16.8,77,3,33.5,32.99419979521502
+8,305.0,145.0,3880.0,12.5,77,1,17.5,16.068293712920823
+8,260.0,110.0,4060.0,19.0,77,1,17.0,17.086923416032032
+8,318.0,145.0,4140.0,13.7,77,1,15.5,15.68801089328068
+8,302.0,130.0,4295.0,14.9,77,1,15.0,15.373737216740258
+6,250.0,110.0,3520.0,16.4,77,1,17.5,18.54798621876747
+6,231.0,105.0,3425.0,16.9,77,1,20.5,19.658055650320215
+6,225.0,100.0,3630.0,17.7,77,1,19.0,19.42717943242424
+6,250.0,98.00,3525.0,19.0,77,1,18.5,18.18604906310617
+8,400.0,180.0,4220.0,11.1,77,1,16.0,15.662623828195935
+8,350.0,170.0,4165.0,11.4,77,1,15.5,15.75995001638552
+8,400.0,190.0,4325.0,12.2,77,1,15.5,15.447879908981516
+8,351.0,149.0,4335.0,14.5,77,1,16.0,15.729880181761065
+4,97.0,78.00,1940.0,14.5,77,2,29.0,29.539010469862095
+4,151.0,88.00,2740.0,16.0,77,1,24.5,25.09932075999005
+4,97.0,75.00,2265.0,18.2,77,3,26.0,28.47041665029408
+4,140.0,89.00,2755.0,15.8,77,1,25.5,24.097734619584916
+4,98.0,63.00,2051.0,17.0,77,1,30.5,30.688320904940067
+4,98.0,83.00,2075.0,15.9,77,1,33.5,28.79652078789174
+4,97.0,67.00,1985.0,16.4,77,3,30.0,35.516104556562865
+4,97.0,78.00,2190.0,14.1,77,2,30.5,28.834923412144402
+6,146.0,97.00,2815.0,14.5,77,3,22.0,21.89917690638142
+4,121.0,110.0,2600.0,12.8,77,2,21.5,23.49606609480152
+3,80.0,110.0,2720.0,13.5,77,3,21.5,22.128329645864532
+4,90.0,48.00,1985.0,21.5,78,2,43.1,42.25438022555937
+4,98.0,66.00,1800.0,14.4,78,1,36.1,34.173408180412785
+4,78.0,52.00,1985.0,19.4,78,3,32.8,34.1915093026189
+4,85.0,70.00,2070.0,18.6,78,3,39.4,37.365969036377535
+4,91.0,60.00,1800.0,16.4,78,3,36.1,33.52201574439778
+8,260.0,110.0,3365.0,15.5,78,1,19.9,19.982099552113322
+8,318.0,140.0,3735.0,13.2,78,1,19.4,18.075284754890998
+8,302.0,139.0,3570.0,12.8,78,1,20.2,18.798402601057838
+6,231.0,105.0,3535.0,19.2,78,1,19.2,19.205158715104854
+6,200.0,95.00,3155.0,18.2,78,1,20.5,19.71053694817312
+6,200.0,85.00,2965.0,15.8,78,1,20.2,21.24643884303184
+4,140.0,88.00,2720.0,15.4,78,1,25.1,26.138288316496904
+6,225.0,100.0,3430.0,17.2,78,1,20.5,20.30986021311325
+6,232.0,90.00,3210.0,17.2,78,1,19.4,20.48886921489166
+6,231.0,105.0,3380.0,15.8,78,1,20.6,19.941138995996596
+6,200.0,85.00,3070.0,16.7,78,1,20.8,20.822257310129075
+6,225.0,110.0,3620.0,18.7,78,1,18.6,19.377662089696994
+6,258.0,120.0,3410.0,15.1,78,1,18.1,19.815004087827166
+8,305.0,145.0,3425.0,13.2,78,1,19.2,19.55040814331093
+6,231.0,165.0,3445.0,13.4,78,1,17.7,19.07604508303332
+8,302.0,139.0,3205.0,11.2,78,1,18.1,19.146090156448206
+8,318.0,140.0,4080.0,13.7,78,1,17.5,16.635767181976433
+4,98.0,68.00,2155.0,16.5,78,1,30.0,31.172159711975798
+4,134.0,95.00,2560.0,14.2,78,3,27.5,25.871315770961512
+4,119.0,97.00,2300.0,14.7,78,3,27.2,25.398600959609016
+4,105.0,75.00,2230.0,14.5,78,1,30.9,31.59430174776567
+4,134.0,95.00,2515.0,14.8,78,3,21.1,25.00435067371597
+4,156.0,105.0,2745.0,16.7,78,1,23.2,23.30512412172436
+4,151.0,85.00,2855.0,17.6,78,1,23.8,22.496719576329244
+4,119.0,97.00,2405.0,14.9,78,3,23.9,25.398600959609016
+5,131.0,103.0,2830.0,15.9,78,2,20.3,21.71554908156222
+6,163.0,125.0,3140.0,13.6,78,2,17.0,21.283788373058936
+4,121.0,115.0,2795.0,15.7,78,2,21.6,22.707159121443734
+6,163.0,133.0,3410.0,15.8,78,2,16.2,20.87938539199248
+4,89.0,71.00,1990.0,14.9,78,2,31.5,31.95946683883547
+4,98.0,68.00,2135.0,16.6,78,3,29.5,31.249096306479384
+6,231.0,115.0,3245.0,15.4,79,1,21.5,20.70269170266688
+6,200.0,85.00,2990.0,18.2,79,1,19.8,21.337245336413545
+4,140.0,88.00,2890.0,17.3,79,1,22.3,25.341050443174613
+6,232.0,90.00,3265.0,18.2,79,1,20.2,20.687411544933354
+6,225.0,110.0,3360.0,16.6,79,1,20.6,20.374365548594326
+8,305.0,130.0,3840.0,15.4,79,1,17.0,16.765932598832308
+8,302.0,129.0,3725.0,13.4,79,1,17.6,18.860386334532947
+8,351.0,138.0,3955.0,13.2,79,1,16.5,17.41103906152409
+8,318.0,135.0,3830.0,15.2,79,1,18.2,17.33689636928283
+8,350.0,155.0,4360.0,14.9,79,1,16.9,16.754066707109395
+8,351.0,142.0,4054.0,14.3,79,1,15.5,16.75548638631812
+8,267.0,125.0,3605.0,15.0,79,1,19.2,19.449924171416857
+8,360.0,150.0,3940.0,13.0,79,1,18.5,17.054819210287985
+4,89.0,71.00,1925.0,14.0,79,2,31.9,33.26224083352532
+4,86.0,65.00,1975.0,15.2,79,3,34.1,40.71965631082923
+4,98.0,80.00,1915.0,14.4,79,1,35.7,34.39111541857416
+4,121.0,80.00,2670.0,15.0,79,1,27.4,28.307380550743847
+5,183.0,77.00,3530.0,20.1,79,2,25.4,24.364887729624773
+8,350.0,125.0,3900.0,17.4,79,1,23.0,17.315255114849077
+4,141.0,71.00,3190.0,24.8,79,2,27.2,26.573422356016504
+8,260.0,90.00,3420.0,22.2,79,1,23.9,22.629747721166307
+4,105.0,70.00,2200.0,13.2,79,1,34.2,34.74536319391144
+4,105.0,70.00,2150.0,14.9,79,1,34.5,33.868550510189515
+4,85.0,65.00,2020.0,19.2,79,3,31.8,41.37512937079361
+4,91.0,69.00,2130.0,14.7,79,2,37.3,36.341018005495656
+4,151.0,90.00,2670.0,16.0,79,1,28.4,27.15804851087683
+6,173.0,115.0,2595.0,11.3,79,1,28.8,27.443103253421214
+6,173.0,115.0,2700.0,12.9,79,1,26.8,27.443103253421214
+4,151.0,90.00,2556.0,13.2,79,1,33.5,31.519920039244518
+4,98.0,76.00,2144.0,14.7,80,2,41.5,36.41667611098805
+4,89.0,60.00,1968.0,18.8,80,3,38.1,38.20717539473338
+4,98.0,70.00,2120.0,15.5,80,1,32.1,33.45611330442045
+4,86.0,65.00,2019.0,16.4,80,3,37.2,44.71711790481043
+4,151.0,90.00,2678.0,16.5,80,1,28.0,27.77014023034906
+4,140.0,88.00,2870.0,18.1,80,1,26.4,26.42596241826284
+4,151.0,90.00,3003.0,20.1,80,1,24.3,25.266926839444174
+6,225.0,90.00,3381.0,18.7,80,1,19.1,21.54170328835083
+4,97.0,78.00,2188.0,15.8,80,2,34.3,34.42767626447314
+4,134.0,90.00,2711.0,15.5,80,3,29.8,29.07373563198588
+4,120.0,75.00,2542.0,17.5,80,3,31.3,33.44754235678454
+4,119.0,92.00,2434.0,15.0,80,3,37.0,35.19787954340858
+4,108.0,75.00,2265.0,15.2,80,3,32.2,33.84812318403143
+4,86.0,65.00,2110.0,17.9,80,3,46.6,46.707884917181644
+4,156.0,105.0,2800.0,14.4,80,1,27.9,24.31723125809084
+4,85.0,65.00,2110.0,19.2,80,3,40.8,44.61336901721456
+4,90.0,48.00,2085.0,21.7,80,2,44.3,44.04873495309377
+4,90.0,48.00,2335.0,23.7,80,2,43.4,43.292619102967045
+5,121.0,67.00,2950.0,19.9,80,2,36.4,38.58419358793866
+4,146.0,67.00,3250.0,21.8,80,2,30.0,33.302919568489195
+4,91.0,67.00,1850.0,13.8,80,3,44.6,46.14809114751762
+4,85.0,?,1835.0,17.3,80,2,40.9,39.46832996122683
+4,97.0,67.00,2145.0,18.0,80,3,33.8,39.84775137517216
+4,89.0,62.00,1845.0,15.3,80,2,29.8,33.85934307396341
+6,168.0,132.0,2910.0,11.4,80,3,32.7,31.507629280182147
+3,70.0,100.0,2420.0,12.5,80,3,23.7,34.252160614856145
+4,122.0,88.00,2500.0,15.1,80,2,35.0,33.332770679285915
+4,140.0,?,2905.0,14.3,80,1,23.6,26.285247006183862
+4,107.0,72.00,2290.0,17.0,80,3,32.4,30.426218239560978
+4,135.0,84.00,2490.0,15.7,81,1,27.2,29.179564839088524
+4,151.0,84.00,2635.0,16.4,81,1,26.6,27.42833395630787
+4,156.0,92.00,2620.0,14.4,81,1,25.8,27.767804618782556
+6,173.0,110.0,2725.0,12.6,81,1,23.5,28.723699283328042
+4,135.0,84.00,2385.0,12.9,81,1,30.0,30.848247701381503
+4,79.0,58.00,1755.0,16.9,81,3,39.1,38.61796233140963
+4,86.0,64.00,1875.0,16.4,81,1,39.0,38.94528588928582
+4,81.0,60.00,1760.0,16.1,81,3,35.1,37.52298925893553
+4,97.0,67.00,2065.0,17.8,81,3,32.3,38.59838821632358
+4,85.0,65.00,1975.0,19.4,81,3,37.0,43.206974575255295
+4,89.0,62.00,2050.0,17.3,81,3,37.7,36.9578122358848
+4,91.0,68.00,1985.0,16.0,81,3,34.1,35.72250991701692
+4,105.0,63.00,2215.0,14.9,81,1,34.7,35.13310280648725
+4,98.0,65.00,2045.0,16.2,81,1,34.4,38.20355220541596
+4,98.0,65.00,2380.0,20.7,81,1,29.9,39.547568414776244
+4,105.0,74.00,2190.0,14.2,81,2,33.0,37.308472648509166
+4,100.0,?,2320.0,15.8,81,2,34.5,33.30558577268826
+4,107.0,75.00,2210.0,14.4,81,3,33.7,34.797069863275595
+4,108.0,75.00,2350.0,16.8,81,3,32.4,33.06724783302362
+4,119.0,100.0,2615.0,14.8,81,3,32.9,31.56081067650422
+4,120.0,74.00,2635.0,18.3,81,3,31.6,32.246580792714646
+4,141.0,80.00,3230.0,20.4,81,2,28.1,29.09190981295705
+6,145.0,76.00,3160.0,19.6,81,2,30.7,29.356329134838397
+6,168.0,116.0,2900.0,12.6,81,3,25.4,26.199966865873854
+6,146.0,120.0,2930.0,13.8,81,3,24.2,26.362133705892468
+6,231.0,110.0,3415.0,15.8,81,1,22.4,21.563875658680523
+8,350.0,105.0,3725.0,19.0,81,1,26.6,25.045716233576467
+6,200.0,88.00,3060.0,17.1,81,1,20.2,21.068486942801076
+6,225.0,85.00,3465.0,16.6,81,1,17.6,20.26876649009557
+4,112.0,88.00,2605.0,19.6,82,1,28.0,29.84512626786672
+4,112.0,88.00,2640.0,18.6,82,1,27.0,29.84512626786672
+4,112.0,88.00,2395.0,18.0,82,1,34.0,33.5742600443853
+4,112.0,85.00,2575.0,16.2,82,1,31.0,29.023945732632264
+4,135.0,84.00,2525.0,16.0,82,1,29.0,31.419670627228616
+4,151.0,90.00,2735.0,18.0,82,1,27.0,27.892469792324
+4,140.0,92.00,2865.0,16.4,82,1,24.0,27.239415939551016
+4,151.0,?,3035.0,20.5,82,1,23.0,25.196343356743597
+4,105.0,74.00,1980.0,15.3,82,2,36.0,35.440784799991576
+4,91.0,68.00,2025.0,18.2,82,3,37.0,37.32728185546619
+4,91.0,68.00,1970.0,17.6,82,3,31.0,37.1735402776545
+4,105.0,63.00,2125.0,14.7,82,1,38.0,37.73595783104049
+4,98.0,70.00,2125.0,17.3,82,1,36.0,35.01865903545254
+4,120.0,88.00,2160.0,14.5,82,3,36.0,34.738059281798506
+4,107.0,75.00,2205.0,14.5,82,3,36.0,36.42428637384083
+4,108.0,70.00,2245.0,16.9,82,3,34.0,34.25449780118571
+4,91.0,67.00,1965.0,15.0,82,3,38.0,43.01433472159814
+4,91.0,67.00,1965.0,15.7,82,3,32.0,41.20814622179627
+4,91.0,67.00,1995.0,16.2,82,3,38.0,42.36134240378185
+6,181.0,110.0,2945.0,16.4,82,1,25.0,31.24070954535207
+6,262.0,85.00,3015.0,17.0,82,1,38.0,31.808149864254364
+4,156.0,92.00,2585.0,14.5,82,1,26.0,28.31246584775401
+6,232.0,112.0,2835.0,14.7,82,1,22.0,33.116830704987294
+4,144.0,96.00,2665.0,13.9,82,3,32.0,31.70802240889664
+4,135.0,84.00,2370.0,13.0,82,1,36.0,33.09672469004753
+4,151.0,90.00,2950.0,17.3,82,1,27.0,26.101102668325314
+4,140.0,86.00,2790.0,15.6,82,1,27.0,25.674883798281442
+4,97.0,52.00,2130.0,24.6,82,2,44.0,34.812667353374415
+4,135.0,84.00,2295.0,11.6,82,1,32.0,32.77687416139507
+4,120.0,79.00,2625.0,18.6,82,1,28.0,27.852079937244017
+4,119.0,82.00,2720.0,19.4,82,1,31.0,30.026254460876412
diff -r 000000000000 -r 1f20fe57fdee utils.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/utils.py Wed Dec 11 04:59:43 2024 +0000
@@ -0,0 +1,157 @@
+import base64
+import logging
+
+logging.basicConfig(level=logging.DEBUG)
+LOG = logging.getLogger(__name__)
+
+
+def get_html_template():
+ return """
+
+
+ Model Training Report
+
+
+
+
+ """
+
+
+def get_html_closing():
+ return """
+
+
+
+
+ """
+
+
+def customize_figure_layout(fig, margin_dict=None):
+ """
+ Update the layout of a Plotly figure to reduce margins.
+
+ Parameters:
+ fig (plotly.graph_objects.Figure): The Plotly figure to customize.
+ margin_dict (dict, optional): A dictionary specifying margin sizes.
+ Example: {'l': 10, 'r': 10, 't': 10, 'b': 10}
+
+ Returns:
+ plotly.graph_objects.Figure: The updated Plotly figure.
+ """
+ if margin_dict is None:
+ # Set default smaller margins
+ margin_dict = {'l': 40, 'r': 40, 't': 40, 'b': 40}
+
+ fig.update_layout(margin=margin_dict)
+ return fig
+
+
+def add_plot_to_html(fig, include_plotlyjs=True):
+ custom_margin = {'l': 40, 'r': 40, 't': 60, 'b': 60}
+ fig = customize_figure_layout(fig, margin_dict=custom_margin)
+ return fig.to_html(full_html=False,
+ default_height=350,
+ include_plotlyjs="cdn" if include_plotlyjs else False)
+
+
+def add_hr_to_html():
+ return "
"
+
+
+def encode_image_to_base64(image_path):
+ """Convert an image file to a base64 encoded string."""
+ with open(image_path, "rb") as img_file:
+ return base64.b64encode(img_file.read()).decode("utf-8")