# HG changeset patch
# User goeckslab
# Date 1733893200 0
# Node ID 915447b14520dc6941fd4344327dcc21314d7429
planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1
diff -r 000000000000 -r 915447b14520 base_model_trainer.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/base_model_trainer.py Wed Dec 11 05:00:00 2024 +0000
@@ -0,0 +1,359 @@
+import base64
+import logging
+import os
+import tempfile
+from feature_importance import FeatureImportanceAnalyzer
+import h5py
+import joblib
+import numpy as np
+import pandas as pd
+from sklearn.metrics import average_precision_score
+from utils import get_html_closing, get_html_template
+LOG = logging.getLogger(__name__)
+class BaseModelTrainer:
+ def __init__(
+ self,
+ input_file,
+ target_col,
+ output_dir,
+ task_type,
+ random_seed,
+ test_file=None,
+ **kwargs
+ ):
+ self.exp = None # This will be set in the subclass
+ self.input_file = input_file
+ self.target_col = target_col
+ self.output_dir = output_dir
+ self.task_type = task_type
+ self.random_seed = random_seed
+ self.data = None
+ self.target = None
+ self.best_model = None
+ self.results = None
+ self.features_name = None
+ self.plots = {}
+ self.expaliner = None
+ self.plots_explainer_html = None
+ self.trees = []
+ for key, value in kwargs.items():
+ setattr(self, key, value)
+ self.setup_params = {}
+ self.test_file = test_file
+ self.test_data = None
+ LOG.info(f"Model kwargs: {self.__dict__}")
+ def load_data(self):
+ LOG.info(f"Loading data from {self.input_file}")
+ self.data = pd.read_csv(self.input_file, sep=None, engine='python')
+ self.data.columns = self.data.columns.str.replace('.', '_')
+ numeric_cols = self.data.select_dtypes(include=['number']).columns
+ non_numeric_cols = self.data.select_dtypes(exclude=['number']).columns
+ self.data[numeric_cols] = self.data[numeric_cols].apply(
+ pd.to_numeric, errors='coerce')
+ if len(non_numeric_cols) > 0:
+ LOG.info(f"Non-numeric columns found: {non_numeric_cols.tolist()}")
+ names = self.data.columns.to_list()
+ target_index = int(self.target_col)-1
+ self.target = names[target_index]
+ self.features_name = [name
+ for i, name in enumerate(names)
+ if i != target_index]
+ if hasattr(self, 'missing_value_strategy'):
+ if self.missing_value_strategy == 'mean':
+ self.data = self.data.fillna(
+ self.data.mean(numeric_only=True))
+ elif self.missing_value_strategy == 'median':
+ self.data = self.data.fillna(
+ self.data.median(numeric_only=True))
+ elif self.missing_value_strategy == 'drop':
+ self.data = self.data.dropna()
+ else:
+ # Default strategy if not specified
+ self.data = self.data.fillna(self.data.median(numeric_only=True))
+ if self.test_file:
+ LOG.info(f"Loading test data from {self.test_file}")
+ self.test_data = pd.read_csv(
+ self.test_file, sep=None, engine='python')
+ self.test_data = self.test_data[numeric_cols].apply(
+ pd.to_numeric, errors='coerce')
+ self.test_data.columns = self.test_data.columns.str.replace(
+ '.', '_'
+ )
+ def setup_pycaret(self):
+ LOG.info("Initializing PyCaret")
+ self.setup_params = {
+ 'target': self.target,
+ 'session_id': self.random_seed,
+ 'html': True,
+ 'log_experiment': False,
+ 'system_log': False,
+ 'index': False,
+ }
+ if self.test_data is not None:
+ self.setup_params['test_data'] = self.test_data
+ if hasattr(self, 'train_size') and self.train_size is not None \
+ and self.test_data is None:
+ self.setup_params['train_size'] = self.train_size
+ if hasattr(self, 'normalize') and self.normalize is not None:
+ self.setup_params['normalize'] = self.normalize
+ if hasattr(self, 'feature_selection') and \
+ self.feature_selection is not None:
+ self.setup_params['feature_selection'] = self.feature_selection
+ if hasattr(self, 'cross_validation') and \
+ self.cross_validation is not None \
+ and self.cross_validation is False:
+ self.setup_params['cross_validation'] = self.cross_validation
+ if hasattr(self, 'cross_validation') and \
+ self.cross_validation is not None:
+ if hasattr(self, 'cross_validation_folds'):
+ self.setup_params['fold'] = self.cross_validation_folds
+ if hasattr(self, 'remove_outliers') and \
+ self.remove_outliers is not None:
+ self.setup_params['remove_outliers'] = self.remove_outliers
+ if hasattr(self, 'remove_multicollinearity') and \
+ self.remove_multicollinearity is not None:
+ self.setup_params['remove_multicollinearity'] = \
+ self.remove_multicollinearity
+ if hasattr(self, 'polynomial_features') and \
+ self.polynomial_features is not None:
+ self.setup_params['polynomial_features'] = self.polynomial_features
+ if hasattr(self, 'fix_imbalance') and \
+ self.fix_imbalance is not None:
+ self.setup_params['fix_imbalance'] = self.fix_imbalance
+ LOG.info(self.setup_params)
+ self.exp.setup(self.data, **self.setup_params)
+ def train_model(self):
+ LOG.info("Training and selecting the best model")
+ if self.task_type == "classification":
+ average_displayed = "Weighted"
+ self.exp.add_metric(id=f'PR-AUC-{average_displayed}',
+ name=f'PR-AUC-{average_displayed}',
+ target='pred_proba',
+ score_func=average_precision_score,
+ average='weighted'
+ )
+ if hasattr(self, 'models') and self.models is not None:
+ self.best_model = self.exp.compare_models(
+ include=self.models)
+ else:
+ self.best_model = self.exp.compare_models()
+ self.results = self.exp.pull()
+ if self.task_type == "classification":
+ self.results.rename(columns={'AUC': 'ROC-AUC'}, inplace=True)
+ _ = self.exp.predict_model(self.best_model)
+ self.test_result_df = self.exp.pull()
+ if self.task_type == "classification":
+ self.test_result_df.rename(
+ columns={'AUC': 'ROC-AUC'}, inplace=True)
+ def save_model(self):
+ hdf5_model_path = "pycaret_model.h5"
+ with h5py.File(hdf5_model_path, 'w') as f:
+ with tempfile.NamedTemporaryFile(delete=False) as temp_file:
+ joblib.dump(self.best_model, temp_file.name)
+ temp_file.seek(0)
+ model_bytes = temp_file.read()
+ f.create_dataset('model', data=np.void(model_bytes))
+ def generate_plots(self):
+ raise NotImplementedError("Subclasses should implement this method")
+ def encode_image_to_base64(self, img_path):
+ with open(img_path, 'rb') as img_file:
+ return base64.b64encode(img_file.read()).decode('utf-8')
+ def save_html_report(self):
+ LOG.info("Saving HTML report")
+ model_name = type(self.best_model).__name__
+ excluded_params = ['html', 'log_experiment', 'system_log', 'test_data']
+ filtered_setup_params = {
+ k: v
+ for k, v in self.setup_params.items() if k not in excluded_params
+ }
+ setup_params_table = pd.DataFrame(
+ list(filtered_setup_params.items()),
+ columns=['Parameter', 'Value'])
+ best_model_params = pd.DataFrame(
+ self.best_model.get_params().items(),
+ columns=['Parameter', 'Value'])
+ best_model_params.to_csv(
+ os.path.join(self.output_dir, 'best_model.csv'),
+ index=False)
+ self.results.to_csv(os.path.join(
+ self.output_dir, "comparison_results.csv"))
+ self.test_result_df.to_csv(os.path.join(
+ self.output_dir, "test_results.csv"))
+ plots_html = ""
+ length = len(self.plots)
+ for i, (plot_name, plot_path) in enumerate(self.plots.items()):
+ encoded_image = self.encode_image_to_base64(plot_path)
+ plots_html += f"""

+ """
+ if i < length - 1:
+ plots_html += "
+ tree_plots = ""
+ for i, tree in enumerate(self.trees):
+ if tree:
+ tree_plots += f"""
Tree {i+1}

+ """
+ analyzer = FeatureImportanceAnalyzer(
+ data=self.data,
+ target_col=self.target_col,
+ task_type=self.task_type,
+ output_dir=self.output_dir)
+ feature_importance_html = analyzer.run()
+ html_content = f"""
+ {get_html_template()}
+ PyCaret Model Training Report
+ Setup & Best Model
+ Best Model Plots
+ Feature Importance
+ Explainer
Setup Parameters
+ Parameter | Value |
+ {setup_params_table.to_html(
+ index=False, header=False, classes='table')}
If you want to know all the experiment setup parameters,
+ please check the PyCaret documentation for
+ the classification/regression exp
Best Model: {model_name}
+ Parameter | Value |
+ {best_model_params.to_html(
+ index=False, header=False, classes='table')}
Comparison Results on the Cross-Validation Set
+ {self.results.to_html(index=False, classes='table')}
Results on the Test Set for the best model
+ {self.test_result_df.to_html(index=False, classes='table')}
Best Model Plots on the testing set
+ {plots_html}
+ {feature_importance_html}
+ {self.plots_explainer_html}
+ {tree_plots}
+ {get_html_closing()}
+ """
+ with open(os.path.join(
+ self.output_dir, "comparison_result.html"), "w") as file:
+ file.write(html_content)
+ def save_dashboard(self):
+ raise NotImplementedError("Subclasses should implement this method")
+ def generate_plots_explainer(self):
+ raise NotImplementedError("Subclasses should implement this method")
+ # not working now
+ def generate_tree_plots(self):
+ from sklearn.ensemble import RandomForestClassifier, \
+ RandomForestRegressor
+ from xgboost import XGBClassifier, XGBRegressor
+ from explainerdashboard.explainers import RandomForestExplainer
+ LOG.info("Generating tree plots")
+ X_test = self.exp.X_test_transformed.copy()
+ y_test = self.exp.y_test_transformed
+ is_rf = isinstance(self.best_model, RandomForestClassifier) or \
+ isinstance(self.best_model, RandomForestRegressor)
+ is_xgb = isinstance(self.best_model, XGBClassifier) or \
+ isinstance(self.best_model, XGBRegressor)
+ try:
+ if is_rf:
+ num_trees = self.best_model.n_estimators
+ if is_xgb:
+ num_trees = len(self.best_model.get_booster().get_dump())
+ explainer = RandomForestExplainer(self.best_model, X_test, y_test)
+ for i in range(num_trees):
+ fig = explainer.decisiontree_encoded(tree_idx=i, index=0)
+ LOG.info(f"Tree {i+1}")
+ LOG.info(fig)
+ self.trees.append(fig)
+ except Exception as e:
+ LOG.error(f"Error generating tree plots: {e}")
+ def run(self):
+ self.load_data()
+ self.setup_pycaret()
+ self.train_model()
+ self.save_model()
+ self.generate_plots()
+ self.generate_plots_explainer()
+ self.generate_tree_plots()
+ self.save_html_report()
+ # self.save_dashboard()
diff -r 000000000000 -r 915447b14520 dashboard.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/dashboard.py Wed Dec 11 05:00:00 2024 +0000
@@ -0,0 +1,159 @@
+import logging
+from typing import Any, Dict, Optional
+from pycaret.utils.generic import get_label_encoder
+LOG = logging.getLogger(__name__)
+def generate_classifier_explainer_dashboard(
+ exp,
+ estimator,
+ display_format: str = "dash",
+ dashboard_kwargs: Optional[Dict[str, Any]] = None,
+ run_kwargs: Optional[Dict[str, Any]] = None,
+ **kwargs,):
+ """
+ This function is changed from pycaret.classification.oop.dashboard()
+ This function generates the interactive dashboard for a trained model.
+ The dashboard is implemented using
+ ExplainerDashboard (explainerdashboard.readthedocs.io)
+ estimator: scikit-learn compatible object
+ Trained model object
+ display_format: str, default = 'dash'
+ Render mode for the dashboard. The default is set to ``dash``
+ which will
+ render a dashboard in browser. There are four possible options:
+ - 'dash' - displays the dashboard in browser
+ - 'inline' - displays the dashboard in the jupyter notebook cell.
+ - 'jupyterlab' - displays the dashboard in jupyterlab pane.
+ - 'external' - displays the dashboard in a separate tab.
+ (use in Colab)
+ dashboard_kwargs: dict, default = {} (empty dict)
+ Dictionary of arguments passed to the ``ExplainerDashboard`` class.
+ run_kwargs: dict, default = {} (empty dict)
+ Dictionary of arguments passed to the ``run``
+ method of ``ExplainerDashboard``.
+ **kwargs:
+ Additional keyword arguments to pass to the ``ClassifierExplainer``
+ or ``RegressionExplainer`` class.
+ Returns:
+ ExplainerDashboard
+ """
+ dashboard_kwargs = dashboard_kwargs or {}
+ run_kwargs = run_kwargs or {}
+ from explainerdashboard import ClassifierExplainer, ExplainerDashboard
+ le = get_label_encoder(exp.pipeline)
+ if le:
+ labels_ = list(le.classes_)
+ else:
+ labels_ = None
+ # Replaceing chars which dash doesnt accept for column name `.` , `{`, `}`
+ X_test_df = exp.X_test_transformed.copy()
+ LOG.info(X_test_df)
+ X_test_df.columns = [
+ col.replace(".", "__").replace("{", "__").replace("}", "__")
+ for col in X_test_df.columns
+ ]
+ explainer = ClassifierExplainer(
+ estimator, X_test_df, exp.y_test_transformed, labels=labels_, **kwargs
+ )
+ return ExplainerDashboard(
+ explainer, mode=display_format,
+ contributions=False, whatif=False,
+ **dashboard_kwargs
+ )
+def generate_regression_explainer_dashboard(
+ exp,
+ estimator,
+ display_format: str = "dash",
+ dashboard_kwargs: Optional[Dict[str, Any]] = None,
+ run_kwargs: Optional[Dict[str, Any]] = None,
+ **kwargs,):
+ """
+ This function is changed from pycaret.regression.oop.dashboard()
+ This function generates the interactive dashboard for a trained model.
+ The dashboard is implemented using ExplainerDashboard
+ (explainerdashboard.readthedocs.io)
+ estimator: scikit-learn compatible object
+ Trained model object
+ display_format: str, default = 'dash'
+ Render mode for the dashboard. The default is set to ``dash``
+ which will
+ render a dashboard in browser. There are four possible options:
+ - 'dash' - displays the dashboard in browser
+ - 'inline' - displays the dashboard in the jupyter notebook cell.
+ - 'jupyterlab' - displays the dashboard in jupyterlab pane.
+ - 'external' - displays the dashboard in a separate tab.
+ (use in Colab)
+ dashboard_kwargs: dict, default = {} (empty dict)
+ Dictionary of arguments passed to the ``ExplainerDashboard`` class.
+ run_kwargs: dict, default = {} (empty dict)
+ Dictionary of arguments passed to the ``run`` method
+ of ``ExplainerDashboard``.
+ **kwargs:
+ Additional keyword arguments to pass to the
+ ``ClassifierExplainer`` or
+ ``RegressionExplainer`` class.
+ Returns:
+ ExplainerDashboard
+ """
+ dashboard_kwargs = dashboard_kwargs or {}
+ run_kwargs = run_kwargs or {}
+ from explainerdashboard import ExplainerDashboard, RegressionExplainer
+ # Replaceing chars which dash doesnt accept for column name `.` , `{`, `}`
+ X_test_df = exp.X_test_transformed.copy()
+ X_test_df.columns = [
+ col.replace(".", "__").replace("{", "__").replace("}", "__")
+ for col in X_test_df.columns
+ ]
+ explainer = RegressionExplainer(
+ estimator, X_test_df, exp.y_test_transformed, **kwargs
+ )
+ return ExplainerDashboard(
+ explainer, mode=display_format, contributions=False,
+ whatif=False, shap_interaction=False, decision_trees=False,
+ **dashboard_kwargs
+ )
diff -r 000000000000 -r 915447b14520 feature_importance.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/feature_importance.py Wed Dec 11 05:00:00 2024 +0000
@@ -0,0 +1,171 @@
+import base64
+import logging
+import os
+import matplotlib.pyplot as plt
+import pandas as pd
+from pycaret.classification import ClassificationExperiment
+from pycaret.regression import RegressionExperiment
+LOG = logging.getLogger(__name__)
+class FeatureImportanceAnalyzer:
+ def __init__(
+ self,
+ task_type,
+ output_dir,
+ data_path=None,
+ data=None,
+ target_col=None):
+ if data is not None:
+ self.data = data
+ LOG.info("Data loaded from memory")
+ else:
+ self.target_col = target_col
+ self.data = pd.read_csv(data_path, sep=None, engine='python')
+ self.data.columns = self.data.columns.str.replace('.', '_')
+ self.data = self.data.fillna(self.data.median(numeric_only=True))
+ self.task_type = task_type
+ self.target = self.data.columns[int(target_col) - 1]
+ self.exp = ClassificationExperiment() \
+ if task_type == 'classification' \
+ else RegressionExperiment()
+ self.plots = {}
+ self.output_dir = output_dir
+ def setup_pycaret(self):
+ LOG.info("Initializing PyCaret")
+ setup_params = {
+ 'target': self.target,
+ 'session_id': 123,
+ 'html': True,
+ 'log_experiment': False,
+ 'system_log': False
+ }
+ LOG.info(self.task_type)
+ LOG.info(self.exp)
+ self.exp.setup(self.data, **setup_params)
+ # def save_coefficients(self):
+ # model = self.exp.create_model('lr')
+ # coef_df = pd.DataFrame({
+ # 'Feature': self.data.columns.drop(self.target),
+ # 'Coefficient': model.coef_[0]
+ # })
+ # coef_html = coef_df.to_html(index=False)
+ # return coef_html
+ def save_tree_importance(self):
+ model = self.exp.create_model('rf')
+ importances = model.feature_importances_
+ processed_features = self.exp.get_config('X_transformed').columns
+ LOG.debug(f"Feature importances: {importances}")
+ LOG.debug(f"Features: {processed_features}")
+ feature_importances = pd.DataFrame({
+ 'Feature': processed_features,
+ 'Importance': importances
+ }).sort_values(by='Importance', ascending=False)
+ plt.figure(figsize=(10, 6))
+ plt.barh(
+ feature_importances['Feature'],
+ feature_importances['Importance'])
+ plt.xlabel('Importance')
+ plt.title('Feature Importance (Random Forest)')
+ plot_path = os.path.join(
+ self.output_dir,
+ 'tree_importance.png')
+ plt.savefig(plot_path)
+ plt.close()
+ self.plots['tree_importance'] = plot_path
+ def save_shap_values(self):
+ model = self.exp.create_model('lightgbm')
+ import shap
+ explainer = shap.Explainer(model)
+ shap_values = explainer.shap_values(
+ self.exp.get_config('X_transformed'))
+ shap.summary_plot(shap_values,
+ self.exp.get_config('X_transformed'), show=False)
+ plt.title('Shap (LightGBM)')
+ plot_path = os.path.join(
+ self.output_dir, 'shap_summary.png')
+ plt.savefig(plot_path)
+ plt.close()
+ self.plots['shap_summary'] = plot_path
+ def generate_feature_importance(self):
+ # coef_html = self.save_coefficients()
+ self.save_tree_importance()
+ self.save_shap_values()
+ def encode_image_to_base64(self, img_path):
+ with open(img_path, 'rb') as img_file:
+ return base64.b64encode(img_file.read()).decode('utf-8')
+ def generate_html_report(self):
+ LOG.info("Generating HTML report")
+ # Read and encode plot images
+ plots_html = ""
+ for plot_name, plot_path in self.plots.items():
+ encoded_image = self.encode_image_to_base64(plot_path)
+ plots_html += f"""
{'Feature importance analysis from a'
+ 'trained Random Forest'
+ if plot_name == 'tree_importance'
+ else 'SHAP Summary from a trained lightgbm'}
{'Use gini impurity for'
+ 'calculating feature importance for classification'
+ 'and Variance Reduction for regression'
+ if plot_name == 'tree_importance'
+ else ''}

+ """
+ # Generate HTML content with tabs
+ html_content = f"""
+ PyCaret Feature Importance Report
+ {plots_html}
+ """
+ return html_content
+ def run(self):
+ LOG.info("Running feature importance analysis")
+ self.setup_pycaret()
+ self.generate_feature_importance()
+ html_content = self.generate_html_report()
+ LOG.info("Feature importance analysis completed")
+ return html_content
+if __name__ == "__main__":
+ import argparse
+ parser = argparse.ArgumentParser(description="Feature Importance Analysis")
+ parser.add_argument(
+ "--data_path", type=str, help="Path to the dataset")
+ parser.add_argument(
+ "--target_col", type=int,
+ help="Index of the target column (1-based)")
+ parser.add_argument(
+ "--task_type", type=str,
+ choices=["classification", "regression"],
+ help="Task type: classification or regression")
+ parser.add_argument(
+ "--output_dir",
+ type=str,
+ help="Directory to save the outputs")
+ args = parser.parse_args()
+ analyzer = FeatureImportanceAnalyzer(
+ args.data_path, args.target_col,
+ args.task_type, args.output_dir)
+ analyzer.run()
diff -r 000000000000 -r 915447b14520 pycaret_classification.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/pycaret_classification.py Wed Dec 11 05:00:00 2024 +0000
@@ -0,0 +1,204 @@
+import logging
+from base_model_trainer import BaseModelTrainer
+from dashboard import generate_classifier_explainer_dashboard
+from pycaret.classification import ClassificationExperiment
+from utils import add_hr_to_html, add_plot_to_html
+LOG = logging.getLogger(__name__)
+class ClassificationModelTrainer(BaseModelTrainer):
+ def __init__(
+ self,
+ input_file,
+ target_col,
+ output_dir,
+ task_type,
+ random_seed,
+ test_file=None,
+ **kwargs):
+ super().__init__(
+ input_file,
+ target_col,
+ output_dir,
+ task_type,
+ random_seed,
+ test_file,
+ **kwargs)
+ self.exp = ClassificationExperiment()
+ def save_dashboard(self):
+ LOG.info("Saving explainer dashboard")
+ dashboard = generate_classifier_explainer_dashboard(self.exp,
+ self.best_model)
+ dashboard.save_html("dashboard.html")
+ def generate_plots(self):
+ LOG.info("Generating and saving plots")
+ plots = ['confusion_matrix', 'auc', 'threshold', 'pr',
+ 'error', 'class_report', 'learning', 'calibration',
+ 'vc', 'dimension', 'manifold', 'rfe', 'feature',
+ 'feature_all']
+ for plot_name in plots:
+ try:
+ if plot_name == 'auc' and not self.exp.is_multiclass:
+ plot_path = self.exp.plot_model(self.best_model,
+ plot=plot_name,
+ save=True,
+ plot_kwargs={
+ 'micro': False,
+ 'macro': False,
+ 'per_class': False,
+ 'binary': True
+ }
+ )
+ self.plots[plot_name] = plot_path
+ continue
+ plot_path = self.exp.plot_model(self.best_model,
+ plot=plot_name, save=True)
+ self.plots[plot_name] = plot_path
+ except Exception as e:
+ LOG.error(f"Error generating plot {plot_name}: {e}")
+ continue
+ def generate_plots_explainer(self):
+ LOG.info("Generating and saving plots from explainer")
+ from explainerdashboard import ClassifierExplainer
+ X_test = self.exp.X_test_transformed.copy()
+ y_test = self.exp.y_test_transformed
+ explainer = ClassifierExplainer(self.best_model, X_test, y_test)
+ self.expaliner = explainer
+ plots_explainer_html = ""
+ try:
+ fig_importance = explainer.plot_importances()
+ plots_explainer_html += add_plot_to_html(fig_importance)
+ plots_explainer_html += add_hr_to_html()
+ except Exception as e:
+ LOG.error(f"Error generating plot importance(mean shap): {e}")
+ try:
+ fig_importance_perm = explainer.plot_importances(
+ kind="permutation")
+ plots_explainer_html += add_plot_to_html(fig_importance_perm)
+ plots_explainer_html += add_hr_to_html()
+ except Exception as e:
+ LOG.error(f"Error generating plot importance(permutation): {e}")
+ # try:
+ # fig_shap = explainer.plot_shap_summary()
+ # plots_explainer_html += add_plot_to_html(fig_shap,
+ # include_plotlyjs=False)
+ # except Exception as e:
+ # LOG.error(f"Error generating plot shap: {e}")
+ # try:
+ # fig_contributions = explainer.plot_contributions(
+ # index=0)
+ # plots_explainer_html += add_plot_to_html(
+ # fig_contributions, include_plotlyjs=False)
+ # except Exception as e:
+ # LOG.error(f"Error generating plot contributions: {e}")
+ # try:
+ # for feature in self.features_name:
+ # fig_dependence = explainer.plot_dependence(col=feature)
+ # plots_explainer_html += add_plot_to_html(fig_dependence)
+ # except Exception as e:
+ # LOG.error(f"Error generating plot dependencies: {e}")
+ try:
+ for feature in self.features_name:
+ fig_pdp = explainer.plot_pdp(feature)
+ plots_explainer_html += add_plot_to_html(fig_pdp)
+ plots_explainer_html += add_hr_to_html()
+ except Exception as e:
+ LOG.error(f"Error generating plot pdp: {e}")
+ try:
+ for feature in self.features_name:
+ fig_interaction = explainer.plot_interaction(
+ col=feature, interact_col=feature)
+ plots_explainer_html += add_plot_to_html(fig_interaction)
+ except Exception as e:
+ LOG.error(f"Error generating plot interactions: {e}")
+ try:
+ for feature in self.features_name:
+ fig_interactions_importance = \
+ explainer.plot_interactions_importance(
+ col=feature)
+ plots_explainer_html += add_plot_to_html(
+ fig_interactions_importance)
+ plots_explainer_html += add_hr_to_html()
+ except Exception as e:
+ LOG.error(f"Error generating plot interactions importance: {e}")
+ # try:
+ # for feature in self.features_name:
+ # fig_interactions_detailed = \
+ # explainer.plot_interactions_detailed(
+ # col=feature)
+ # plots_explainer_html += add_plot_to_html(
+ # fig_interactions_detailed)
+ # except Exception as e:
+ # LOG.error(f"Error generating plot interactions detailed: {e}")
+ try:
+ fig_precision = explainer.plot_precision()
+ plots_explainer_html += add_plot_to_html(fig_precision)
+ plots_explainer_html += add_hr_to_html()
+ except Exception as e:
+ LOG.error(f"Error generating plot precision: {e}")
+ try:
+ fig_cumulative_precision = explainer.plot_cumulative_precision()
+ plots_explainer_html += add_plot_to_html(fig_cumulative_precision)
+ plots_explainer_html += add_hr_to_html()
+ except Exception as e:
+ LOG.error(f"Error generating plot cumulative precision: {e}")
+ try:
+ fig_classification = explainer.plot_classification()
+ plots_explainer_html += add_plot_to_html(fig_classification)
+ plots_explainer_html += add_hr_to_html()
+ except Exception as e:
+ LOG.error(f"Error generating plot classification: {e}")
+ try:
+ fig_confusion_matrix = explainer.plot_confusion_matrix()
+ plots_explainer_html += add_plot_to_html(fig_confusion_matrix)
+ plots_explainer_html += add_hr_to_html()
+ except Exception as e:
+ LOG.error(f"Error generating plot confusion matrix: {e}")
+ try:
+ fig_lift_curve = explainer.plot_lift_curve()
+ plots_explainer_html += add_plot_to_html(fig_lift_curve)
+ plots_explainer_html += add_hr_to_html()
+ except Exception as e:
+ LOG.error(f"Error generating plot lift curve: {e}")
+ try:
+ fig_roc_auc = explainer.plot_roc_auc()
+ plots_explainer_html += add_plot_to_html(fig_roc_auc)
+ plots_explainer_html += add_hr_to_html()
+ except Exception as e:
+ LOG.error(f"Error generating plot roc auc: {e}")
+ try:
+ fig_pr_auc = explainer.plot_pr_auc()
+ plots_explainer_html += add_plot_to_html(fig_pr_auc)
+ plots_explainer_html += add_hr_to_html()
+ except Exception as e:
+ LOG.error(f"Error generating plot pr auc: {e}")
+ self.plots_explainer_html = plots_explainer_html
diff -r 000000000000 -r 915447b14520 pycaret_macros.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/pycaret_macros.xml Wed Dec 11 05:00:00 2024 +0000
@@ -0,0 +1,25 @@
+ 3.3.2
+ 0
+ 21.05
+ quay.io/goeckslab/galaxy-pycaret:3.3.2
+ @Manual{PyCaret,
+ author = {Moez Ali},
+ title = {PyCaret: An open source, low-code machine learning library in Python},
+ year = {2020},
+ month = {April},
+ note = {PyCaret version 1.0.0},
+ url = {https://www.pycaret.org}
\ No newline at end of file
diff -r 000000000000 -r 915447b14520 pycaret_predict.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/pycaret_predict.py Wed Dec 11 05:00:00 2024 +0000
@@ -0,0 +1,200 @@
+import argparse
+import logging
+import tempfile
+import h5py
+import joblib
+import pandas as pd
+from pycaret.classification import ClassificationExperiment
+from pycaret.regression import RegressionExperiment
+from sklearn.metrics import average_precision_score
+from utils import encode_image_to_base64, get_html_closing, get_html_template
+LOG = logging.getLogger(__name__)
+class PyCaretModelEvaluator:
+ def __init__(self, model_path, task, target):
+ self.model_path = model_path
+ self.task = task.lower()
+ self.model = self.load_h5_model()
+ self.target = target if target != "None" else None
+ def load_h5_model(self):
+ """Load a PyCaret model from an HDF5 file."""
+ with h5py.File(self.model_path, 'r') as f:
+ model_bytes = bytes(f['model'][()])
+ with tempfile.NamedTemporaryFile(delete=False) as temp_file:
+ temp_file.write(model_bytes)
+ temp_file.seek(0)
+ loaded_model = joblib.load(temp_file.name)
+ return loaded_model
+ def evaluate(self, data_path):
+ """Evaluate the model using the specified data."""
+ raise NotImplementedError("Subclasses must implement this method")
+class ClassificationEvaluator(PyCaretModelEvaluator):
+ def evaluate(self, data_path):
+ metrics = None
+ plot_paths = {}
+ data = pd.read_csv(data_path, engine='python', sep=None)
+ if self.target:
+ exp = ClassificationExperiment()
+ names = data.columns.to_list()
+ LOG.error(f"Column names: {names}")
+ target_index = int(self.target)-1
+ target_name = names[target_index]
+ exp.setup(data, target=target_name, test_data=data, index=False)
+ exp.add_metric(id='PR-AUC-Weighted',
+ name='PR-AUC-Weighted',
+ target='pred_proba',
+ score_func=average_precision_score,
+ average='weighted')
+ predictions = exp.predict_model(self.model)
+ metrics = exp.pull()
+ plots = ['confusion_matrix', 'auc', 'threshold', 'pr',
+ 'error', 'class_report', 'learning', 'calibration',
+ 'vc', 'dimension', 'manifold', 'rfe', 'feature',
+ 'feature_all']
+ for plot_name in plots:
+ try:
+ if plot_name == 'auc' and not exp.is_multiclass:
+ plot_path = exp.plot_model(self.model,
+ plot=plot_name,
+ save=True,
+ plot_kwargs={
+ 'micro': False,
+ 'macro': False,
+ 'per_class': False,
+ 'binary': True
+ })
+ plot_paths[plot_name] = plot_path
+ continue
+ plot_path = exp.plot_model(self.model,
+ plot=plot_name, save=True)
+ plot_paths[plot_name] = plot_path
+ except Exception as e:
+ LOG.error(f"Error generating plot {plot_name}: {e}")
+ continue
+ generate_html_report(plot_paths, metrics)
+ else:
+ exp = ClassificationExperiment()
+ exp.setup(data, target=None, test_data=data, index=False)
+ predictions = exp.predict_model(self.model, data=data)
+ return predictions, metrics, plot_paths
+class RegressionEvaluator(PyCaretModelEvaluator):
+ def evaluate(self, data_path):
+ metrics = None
+ plot_paths = {}
+ data = pd.read_csv(data_path, engine='python', sep=None)
+ if self.target:
+ names = data.columns.to_list()
+ target_index = int(self.target)-1
+ target_name = names[target_index]
+ exp = RegressionExperiment()
+ exp.setup(data, target=target_name, test_data=data, index=False)
+ predictions = exp.predict_model(self.model)
+ metrics = exp.pull()
+ plots = ['residuals', 'error', 'cooks',
+ 'learning', 'vc', 'manifold',
+ 'rfe', 'feature', 'feature_all']
+ for plot_name in plots:
+ try:
+ plot_path = exp.plot_model(self.model,
+ plot=plot_name, save=True)
+ plot_paths[plot_name] = plot_path
+ except Exception as e:
+ LOG.error(f"Error generating plot {plot_name}: {e}")
+ continue
+ generate_html_report(plot_paths, metrics)
+ else:
+ exp = RegressionExperiment()
+ exp.setup(data, target=None, test_data=data, index=False)
+ predictions = exp.predict_model(self.model, data=data)
+ return predictions, metrics, plot_paths
+def generate_html_report(plots, metrics):
+ """Generate an HTML evaluation report."""
+ plots_html = ""
+ for plot_name, plot_path in plots.items():
+ encoded_image = encode_image_to_base64(plot_path)
+ plots_html += f"""

+ """
+ metrics_html = metrics.to_html(index=False, classes="table")
+ html_content = f"""
+ {get_html_template()}
+ Model Evaluation Report
+ {plots_html}
+ {get_html_closing()}
+ """
+ # Save HTML report
+ with open("evaluation_report.html", "w") as html_file:
+ html_file.write(html_content)
+if __name__ == "__main__":
+ parser = argparse.ArgumentParser(
+ description="Evaluate a PyCaret model stored in HDF5 format.")
+ parser.add_argument("--model_path",
+ type=str,
+ help="Path to the HDF5 model file.")
+ parser.add_argument("--data_path",
+ type=str,
+ help="Path to the evaluation data CSV file.")
+ parser.add_argument("--task",
+ type=str,
+ choices=["classification", "regression"],
+ help="Specify the task: classification or regression.")
+ parser.add_argument("--target",
+ default=None,
+ help="Column number of the target")
+ args = parser.parse_args()
+ if args.task == "classification":
+ evaluator = ClassificationEvaluator(
+ args.model_path, args.task, args.target)
+ elif args.task == "regression":
+ evaluator = RegressionEvaluator(
+ args.model_path, args.task, args.target)
+ else:
+ raise ValueError(
+ "Unsupported task type. Use 'classification' or 'regression'.")
+ predictions, metrics, plots = evaluator.evaluate(args.data_path)
+ predictions.to_csv("predictions.csv", index=False)
diff -r 000000000000 -r 915447b14520 pycaret_regression.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/pycaret_regression.py Wed Dec 11 05:00:00 2024 +0000
@@ -0,0 +1,134 @@
+import logging
+from base_model_trainer import BaseModelTrainer
+from dashboard import generate_regression_explainer_dashboard
+from pycaret.regression import RegressionExperiment
+from utils import add_hr_to_html, add_plot_to_html
+LOG = logging.getLogger(__name__)
+class RegressionModelTrainer(BaseModelTrainer):
+ def __init__(
+ self,
+ input_file,
+ target_col,
+ output_dir,
+ task_type,
+ random_seed,
+ test_file=None,
+ **kwargs):
+ super().__init__(
+ input_file,
+ target_col,
+ output_dir,
+ task_type,
+ random_seed,
+ test_file,
+ **kwargs)
+ self.exp = RegressionExperiment()
+ def save_dashboard(self):
+ LOG.info("Saving explainer dashboard")
+ dashboard = generate_regression_explainer_dashboard(self.exp,
+ self.best_model)
+ dashboard.save_html("dashboard.html")
+ def generate_plots(self):
+ LOG.info("Generating and saving plots")
+ plots = ['residuals', 'error', 'cooks',
+ 'learning', 'vc', 'manifold',
+ 'rfe', 'feature', 'feature_all']
+ for plot_name in plots:
+ try:
+ plot_path = self.exp.plot_model(self.best_model,
+ plot=plot_name, save=True)
+ self.plots[plot_name] = plot_path
+ except Exception as e:
+ LOG.error(f"Error generating plot {plot_name}: {e}")
+ continue
+ def generate_plots_explainer(self):
+ LOG.info("Generating and saving plots from explainer")
+ from explainerdashboard import RegressionExplainer
+ X_test = self.exp.X_test_transformed.copy()
+ y_test = self.exp.y_test_transformed
+ explainer = RegressionExplainer(self.best_model, X_test, y_test)
+ self.expaliner = explainer
+ plots_explainer_html = ""
+ try:
+ fig_importance = explainer.plot_importances()
+ plots_explainer_html += add_plot_to_html(fig_importance)
+ plots_explainer_html += add_hr_to_html()
+ except Exception as e:
+ LOG.error(f"Error generating plot importance: {e}")
+ try:
+ fig_importance_permutation = \
+ explainer.plot_importances_permutation(
+ kind="permutation")
+ plots_explainer_html += add_plot_to_html(
+ fig_importance_permutation)
+ plots_explainer_html += add_hr_to_html()
+ except Exception as e:
+ LOG.error(f"Error generating plot importance permutation: {e}")
+ try:
+ for feature in self.features_name:
+ fig_shap = explainer.plot_pdp(feature)
+ plots_explainer_html += add_plot_to_html(fig_shap)
+ plots_explainer_html += add_hr_to_html()
+ except Exception as e:
+ LOG.error(f"Error generating plot shap dependence: {e}")
+ # try:
+ # for feature in self.features_name:
+ # fig_interaction = explainer.plot_interaction(col=feature)
+ # plots_explainer_html += add_plot_to_html(fig_interaction)
+ # except Exception as e:
+ # LOG.error(f"Error generating plot shap interaction: {e}")
+ try:
+ for feature in self.features_name:
+ fig_interactions_importance = \
+ explainer.plot_interactions_importance(
+ col=feature)
+ plots_explainer_html += add_plot_to_html(
+ fig_interactions_importance)
+ plots_explainer_html += add_hr_to_html()
+ except Exception as e:
+ LOG.error(f"Error generating plot shap summary: {e}")
+ # Regression specific plots
+ try:
+ fig_pred_actual = explainer.plot_predicted_vs_actual()
+ plots_explainer_html += add_plot_to_html(fig_pred_actual)
+ plots_explainer_html += add_hr_to_html()
+ except Exception as e:
+ LOG.error(f"Error generating plot prediction vs actual: {e}")
+ try:
+ fig_residuals = explainer.plot_residuals()
+ plots_explainer_html += add_plot_to_html(fig_residuals)
+ plots_explainer_html += add_hr_to_html()
+ except Exception as e:
+ LOG.error(f"Error generating plot residuals: {e}")
+ try:
+ for feature in self.features_name:
+ fig_residuals_vs_feature = \
+ explainer.plot_residuals_vs_feature(feature)
+ plots_explainer_html += add_plot_to_html(
+ fig_residuals_vs_feature)
+ plots_explainer_html += add_hr_to_html()
+ except Exception as e:
+ LOG.error(f"Error generating plot residuals vs feature: {e}")
+ self.plots_explainer_html = plots_explainer_html
diff -r 000000000000 -r 915447b14520 pycaret_train.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/pycaret_train.py Wed Dec 11 05:00:00 2024 +0000
@@ -0,0 +1,117 @@
+import argparse
+import logging
+from pycaret_classification import ClassificationModelTrainer
+from pycaret_regression import RegressionModelTrainer
+LOG = logging.getLogger(__name__)
+def main():
+ parser = argparse.ArgumentParser()
+ parser.add_argument("--input_file", help="Path to the input file")
+ parser.add_argument("--target_col", help="Column number of the target")
+ parser.add_argument("--output_dir",
+ help="Path to the output directory")
+ parser.add_argument("--model_type",
+ choices=["classification", "regression"],
+ help="Type of the model")
+ parser.add_argument("--train_size", type=float,
+ default=None,
+ help="Train size for PyCaret setup")
+ parser.add_argument("--normalize", action="store_true",
+ default=None,
+ help="Normalize data for PyCaret setup")
+ parser.add_argument("--feature_selection", action="store_true",
+ default=None,
+ help="Perform feature selection for PyCaret setup")
+ parser.add_argument("--cross_validation", action="store_true",
+ default=None,
+ help="Perform cross-validation for PyCaret setup")
+ parser.add_argument("--cross_validation_folds", type=int,
+ default=None,
+ help="Number of cross-validation folds \
+ for PyCaret setup")
+ parser.add_argument("--remove_outliers", action="store_true",
+ default=None,
+ help="Remove outliers for PyCaret setup")
+ parser.add_argument("--remove_multicollinearity", action="store_true",
+ default=None,
+ help="Remove multicollinearity for PyCaret setup")
+ parser.add_argument("--polynomial_features", action="store_true",
+ default=None,
+ help="Generate polynomial features for PyCaret setup")
+ parser.add_argument("--feature_interaction", action="store_true",
+ default=None,
+ help="Generate feature interactions for PyCaret setup")
+ parser.add_argument("--feature_ratio", action="store_true",
+ default=None,
+ help="Generate feature ratios for PyCaret setup")
+ parser.add_argument("--fix_imbalance", action="store_true",
+ default=None,
+ help="Fix class imbalance for PyCaret setup")
+ parser.add_argument("--models", nargs='+',
+ default=None,
+ help="Selected models for training")
+ parser.add_argument("--random_seed", type=int,
+ default=42,
+ help="Random seed for PyCaret setup")
+ parser.add_argument("--test_file", type=str, default=None,
+ help="Path to the test data file")
+ args = parser.parse_args()
+ model_kwargs = {
+ "train_size": args.train_size,
+ "normalize": args.normalize,
+ "feature_selection": args.feature_selection,
+ "cross_validation": args.cross_validation,
+ "cross_validation_folds": args.cross_validation_folds,
+ "remove_outliers": args.remove_outliers,
+ "remove_multicollinearity": args.remove_multicollinearity,
+ "polynomial_features": args.polynomial_features,
+ "feature_interaction": args.feature_interaction,
+ "feature_ratio": args.feature_ratio,
+ "fix_imbalance": args.fix_imbalance,
+ }
+ LOG.info(f"Model kwargs: {model_kwargs}")
+ # Remove None values from model_kwargs
+ LOG.info(f"Model kwargs 2: {model_kwargs}")
+ if args.models:
+ model_kwargs["models"] = args.models[0].split(",")
+ model_kwargs = {k: v for k, v in model_kwargs.items() if v is not None}
+ if args.model_type == "classification":
+ trainer = ClassificationModelTrainer(
+ args.input_file,
+ args.target_col,
+ args.output_dir,
+ args.model_type,
+ args.random_seed,
+ args.test_file,
+ **model_kwargs)
+ elif args.model_type == "regression":
+ if "fix_imbalance" in model_kwargs:
+ del model_kwargs["fix_imbalance"]
+ trainer = RegressionModelTrainer(
+ args.input_file,
+ args.target_col,
+ args.output_dir,
+ args.model_type,
+ args.random_seed,
+ args.test_file,
+ **model_kwargs)
+ else:
+ LOG.error("Invalid model type. Please choose \
+ 'classification' or 'regression'.")
+ return
+ trainer.run()
+if __name__ == "__main__":
+ main()
diff -r 000000000000 -r 915447b14520 pycaret_train.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/pycaret_train.xml Wed Dec 11 05:00:00 2024 +0000
@@ -0,0 +1,209 @@
+ compares different machine learning models on a dataset using PyCaret. Do feature analyses using Random Forest and LightGBM.
+ pycaret_macros.xml
+ This tool uses PyCaret to train and evaluate machine learning models.
+ It compares different models on a dataset and provides the best model based on the performance metrics.
+ **Outputs**
+ - **Model**: The best model trained on the dataset in h5 format.
+ - **Comparison Result**: The comparison result of different models in html format.
+ It contains the performance metrics of different models, plots of the best model
+ on the testing set (or part of the training set if a separate test set is not uploaded), and feature analysis plots.
\ No newline at end of file
diff -r 000000000000 -r 915447b14520 test-data/auto-mpg.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/auto-mpg.tsv Wed Dec 11 05:00:00 2024 +0000
@@ -0,0 +1,399 @@
+MPG Cylinders Displacement Horsepower Weight Acceleration ModelYear Origin
+18.0 8 307.0 130.0 3504. 12.0 70 1
+15.0 8 350.0 165.0 3693. 11.5 70 1
+18.0 8 318.0 150.0 3436. 11.0 70 1
+16.0 8 304.0 150.0 3433. 12.0 70 1
+17.0 8 302.0 140.0 3449. 10.5 70 1
+15.0 8 429.0 198.0 4341. 10.0 70 1
+14.0 8 454.0 220.0 4354. 9.0 70 1
+14.0 8 440.0 215.0 4312. 8.5 70 1
+14.0 8 455.0 225.0 4425. 10.0 70 1
+15.0 8 390.0 190.0 3850. 8.5 70 1
+15.0 8 383.0 170.0 3563. 10.0 70 1
+14.0 8 340.0 160.0 3609. 8.0 70 1
+15.0 8 400.0 150.0 3761. 9.5 70 1
+14.0 8 455.0 225.0 3086. 10.0 70 1
+24.0 4 113.0 95.00 2372. 15.0 70 3
+22.0 6 198.0 95.00 2833. 15.5 70 1
+18.0 6 199.0 97.00 2774. 15.5 70 1
+21.0 6 200.0 85.00 2587. 16.0 70 1
+27.0 4 97.00 88.00 2130. 14.5 70 3
+26.0 4 97.00 46.00 1835. 20.5 70 2
+25.0 4 110.0 87.00 2672. 17.5 70 2
+24.0 4 107.0 90.00 2430. 14.5 70 2
+25.0 4 104.0 95.00 2375. 17.5 70 2
+26.0 4 121.0 113.0 2234. 12.5 70 2
+21.0 6 199.0 90.00 2648. 15.0 70 1
+10.0 8 360.0 215.0 4615. 14.0 70 1
+10.0 8 307.0 200.0 4376. 15.0 70 1
+11.0 8 318.0 210.0 4382. 13.5 70 1
+9.0 8 304.0 193.0 4732. 18.5 70 1
+27.0 4 97.00 88.00 2130. 14.5 71 3
+28.0 4 140.0 90.00 2264. 15.5 71 1
+25.0 4 113.0 95.00 2228. 14.0 71 3
+25.0 4 98.00 ? 2046. 19.0 71 1
+19.0 6 232.0 100.0 2634. 13.0 71 1
+16.0 6 225.0 105.0 3439. 15.5 71 1
+17.0 6 250.0 100.0 3329. 15.5 71 1
+19.0 6 250.0 88.00 3302. 15.5 71 1
+18.0 6 232.0 100.0 3288. 15.5 71 1
+14.0 8 350.0 165.0 4209. 12.0 71 1
+14.0 8 400.0 175.0 4464. 11.5 71 1
+14.0 8 351.0 153.0 4154. 13.5 71 1
+14.0 8 318.0 150.0 4096. 13.0 71 1
+12.0 8 383.0 180.0 4955. 11.5 71 1
+13.0 8 400.0 170.0 4746. 12.0 71 1
+13.0 8 400.0 175.0 5140. 12.0 71 1
+18.0 6 258.0 110.0 2962. 13.5 71 1
+22.0 4 140.0 72.00 2408. 19.0 71 1
+19.0 6 250.0 100.0 3282. 15.0 71 1
+18.0 6 250.0 88.00 3139. 14.5 71 1
+23.0 4 122.0 86.00 2220. 14.0 71 1
+28.0 4 116.0 90.00 2123. 14.0 71 2
+30.0 4 79.00 70.00 2074. 19.5 71 2
+30.0 4 88.00 76.00 2065. 14.5 71 2
+31.0 4 71.00 65.00 1773. 19.0 71 3
+35.0 4 72.00 69.00 1613. 18.0 71 3
+27.0 4 97.00 60.00 1834. 19.0 71 2
+26.0 4 91.00 70.00 1955. 20.5 71 1
+24.0 4 113.0 95.00 2278. 15.5 72 3
+25.0 4 97.50 80.00 2126. 17.0 72 1
+23.0 4 97.00 54.00 2254. 23.5 72 2
+20.0 4 140.0 90.00 2408. 19.5 72 1
+21.0 4 122.0 86.00 2226. 16.5 72 1
+13.0 8 350.0 165.0 4274. 12.0 72 1
+14.0 8 400.0 175.0 4385. 12.0 72 1
+15.0 8 318.0 150.0 4135. 13.5 72 1
+14.0 8 351.0 153.0 4129. 13.0 72 1
+17.0 8 304.0 150.0 3672. 11.5 72 1
+11.0 8 429.0 208.0 4633. 11.0 72 1
+13.0 8 350.0 155.0 4502. 13.5 72 1
+12.0 8 350.0 160.0 4456. 13.5 72 1
+13.0 8 400.0 190.0 4422. 12.5 72 1
+19.0 3 70.00 97.00 2330. 13.5 72 3
+15.0 8 304.0 150.0 3892. 12.5 72 1
+13.0 8 307.0 130.0 4098. 14.0 72 1
+13.0 8 302.0 140.0 4294. 16.0 72 1
+14.0 8 318.0 150.0 4077. 14.0 72 1
+18.0 4 121.0 112.0 2933. 14.5 72 2
+22.0 4 121.0 76.00 2511. 18.0 72 2
+21.0 4 120.0 87.00 2979. 19.5 72 2
+26.0 4 96.00 69.00 2189. 18.0 72 2
+22.0 4 122.0 86.00 2395. 16.0 72 1
+28.0 4 97.00 92.00 2288. 17.0 72 3
+23.0 4 120.0 97.00 2506. 14.5 72 3
+28.0 4 98.00 80.00 2164. 15.0 72 1
+27.0 4 97.00 88.00 2100. 16.5 72 3
+13.0 8 350.0 175.0 4100. 13.0 73 1
+14.0 8 304.0 150.0 3672. 11.5 73 1
+13.0 8 350.0 145.0 3988. 13.0 73 1
+14.0 8 302.0 137.0 4042. 14.5 73 1
+15.0 8 318.0 150.0 3777. 12.5 73 1
+12.0 8 429.0 198.0 4952. 11.5 73 1
+13.0 8 400.0 150.0 4464. 12.0 73 1
+13.0 8 351.0 158.0 4363. 13.0 73 1
+14.0 8 318.0 150.0 4237. 14.5 73 1
+13.0 8 440.0 215.0 4735. 11.0 73 1
+12.0 8 455.0 225.0 4951. 11.0 73 1
+13.0 8 360.0 175.0 3821. 11.0 73 1
+18.0 6 225.0 105.0 3121. 16.5 73 1
+16.0 6 250.0 100.0 3278. 18.0 73 1
+18.0 6 232.0 100.0 2945. 16.0 73 1
+18.0 6 250.0 88.00 3021. 16.5 73 1
+23.0 6 198.0 95.00 2904. 16.0 73 1
+26.0 4 97.00 46.00 1950. 21.0 73 2
+11.0 8 400.0 150.0 4997. 14.0 73 1
+12.0 8 400.0 167.0 4906. 12.5 73 1
+13.0 8 360.0 170.0 4654. 13.0 73 1
+12.0 8 350.0 180.0 4499. 12.5 73 1
+18.0 6 232.0 100.0 2789. 15.0 73 1
+20.0 4 97.00 88.00 2279. 19.0 73 3
+21.0 4 140.0 72.00 2401. 19.5 73 1
+22.0 4 108.0 94.00 2379. 16.5 73 3
+18.0 3 70.00 90.00 2124. 13.5 73 3
+19.0 4 122.0 85.00 2310. 18.5 73 1
+21.0 6 155.0 107.0 2472. 14.0 73 1
+26.0 4 98.00 90.00 2265. 15.5 73 2
+15.0 8 350.0 145.0 4082. 13.0 73 1
+16.0 8 400.0 230.0 4278. 9.50 73 1
+29.0 4 68.00 49.00 1867. 19.5 73 2
+24.0 4 116.0 75.00 2158. 15.5 73 2
+20.0 4 114.0 91.00 2582. 14.0 73 2
+19.0 4 121.0 112.0 2868. 15.5 73 2
+15.0 8 318.0 150.0 3399. 11.0 73 1
+24.0 4 121.0 110.0 2660. 14.0 73 2
+20.0 6 156.0 122.0 2807. 13.5 73 3
+11.0 8 350.0 180.0 3664. 11.0 73 1
+20.0 6 198.0 95.00 3102. 16.5 74 1
+21.0 6 200.0 ? 2875. 17.0 74 1
+19.0 6 232.0 100.0 2901. 16.0 74 1
+15.0 6 250.0 100.0 3336. 17.0 74 1
+31.0 4 79.00 67.00 1950. 19.0 74 3
+26.0 4 122.0 80.00 2451. 16.5 74 1
+32.0 4 71.00 65.00 1836. 21.0 74 3
+25.0 4 140.0 75.00 2542. 17.0 74 1
+16.0 6 250.0 100.0 3781. 17.0 74 1
+16.0 6 258.0 110.0 3632. 18.0 74 1
+18.0 6 225.0 105.0 3613. 16.5 74 1
+16.0 8 302.0 140.0 4141. 14.0 74 1
+13.0 8 350.0 150.0 4699. 14.5 74 1
+14.0 8 318.0 150.0 4457. 13.5 74 1
+14.0 8 302.0 140.0 4638. 16.0 74 1
+14.0 8 304.0 150.0 4257. 15.5 74 1
+29.0 4 98.00 83.00 2219. 16.5 74 2
+26.0 4 79.00 67.00 1963. 15.5 74 2
+26.0 4 97.00 78.00 2300. 14.5 74 2
+31.0 4 76.00 52.00 1649. 16.5 74 3
+32.0 4 83.00 61.00 2003. 19.0 74 3
+28.0 4 90.00 75.00 2125. 14.5 74 1
+24.0 4 90.00 75.00 2108. 15.5 74 2
+26.0 4 116.0 75.00 2246. 14.0 74 2
+24.0 4 120.0 97.00 2489. 15.0 74 3
+26.0 4 108.0 93.00 2391. 15.5 74 3
+31.0 4 79.00 67.00 2000. 16.0 74 2
+19.0 6 225.0 95.00 3264. 16.0 75 1
+18.0 6 250.0 105.0 3459. 16.0 75 1
+15.0 6 250.0 72.00 3432. 21.0 75 1
+15.0 6 250.0 72.00 3158. 19.5 75 1
+16.0 8 400.0 170.0 4668. 11.5 75 1
+15.0 8 350.0 145.0 4440. 14.0 75 1
+16.0 8 318.0 150.0 4498. 14.5 75 1
+14.0 8 351.0 148.0 4657. 13.5 75 1
+17.0 6 231.0 110.0 3907. 21.0 75 1
+16.0 6 250.0 105.0 3897. 18.5 75 1
+15.0 6 258.0 110.0 3730. 19.0 75 1
+18.0 6 225.0 95.00 3785. 19.0 75 1
+21.0 6 231.0 110.0 3039. 15.0 75 1
+20.0 8 262.0 110.0 3221. 13.5 75 1
+13.0 8 302.0 129.0 3169. 12.0 75 1
+29.0 4 97.00 75.00 2171. 16.0 75 3
+23.0 4 140.0 83.00 2639. 17.0 75 1
+20.0 6 232.0 100.0 2914. 16.0 75 1
+23.0 4 140.0 78.00 2592. 18.5 75 1
+24.0 4 134.0 96.00 2702. 13.5 75 3
+25.0 4 90.00 71.00 2223. 16.5 75 2
+24.0 4 119.0 97.00 2545. 17.0 75 3
+18.0 6 171.0 97.00 2984. 14.5 75 1
+29.0 4 90.00 70.00 1937. 14.0 75 2
+19.0 6 232.0 90.00 3211. 17.0 75 1
+23.0 4 115.0 95.00 2694. 15.0 75 2
+23.0 4 120.0 88.00 2957. 17.0 75 2
+22.0 4 121.0 98.00 2945. 14.5 75 2
+25.0 4 121.0 115.0 2671. 13.5 75 2
+33.0 4 91.00 53.00 1795. 17.5 75 3
+28.0 4 107.0 86.00 2464. 15.5 76 2
+25.0 4 116.0 81.00 2220. 16.9 76 2
+25.0 4 140.0 92.00 2572. 14.9 76 1
+26.0 4 98.00 79.00 2255. 17.7 76 1
+27.0 4 101.0 83.00 2202. 15.3 76 2
+17.5 8 305.0 140.0 4215. 13.0 76 1
+16.0 8 318.0 150.0 4190. 13.0 76 1
+15.5 8 304.0 120.0 3962. 13.9 76 1
+14.5 8 351.0 152.0 4215. 12.8 76 1
+22.0 6 225.0 100.0 3233. 15.4 76 1
+22.0 6 250.0 105.0 3353. 14.5 76 1
+24.0 6 200.0 81.00 3012. 17.6 76 1
+22.5 6 232.0 90.00 3085. 17.6 76 1
+29.0 4 85.00 52.00 2035. 22.2 76 1
+24.5 4 98.00 60.00 2164. 22.1 76 1
+29.0 4 90.00 70.00 1937. 14.2 76 2
+33.0 4 91.00 53.00 1795. 17.4 76 3
+20.0 6 225.0 100.0 3651. 17.7 76 1
+18.0 6 250.0 78.00 3574. 21.0 76 1
+18.5 6 250.0 110.0 3645. 16.2 76 1
+17.5 6 258.0 95.00 3193. 17.8 76 1
+29.5 4 97.00 71.00 1825. 12.2 76 2
+32.0 4 85.00 70.00 1990. 17.0 76 3
+28.0 4 97.00 75.00 2155. 16.4 76 3
+26.5 4 140.0 72.00 2565. 13.6 76 1
+20.0 4 130.0 102.0 3150. 15.7 76 2
+13.0 8 318.0 150.0 3940. 13.2 76 1
+19.0 4 120.0 88.00 3270. 21.9 76 2
+19.0 6 156.0 108.0 2930. 15.5 76 3
+16.5 6 168.0 120.0 3820. 16.7 76 2
+16.5 8 350.0 180.0 4380. 12.1 76 1
+13.0 8 350.0 145.0 4055. 12.0 76 1
+13.0 8 302.0 130.0 3870. 15.0 76 1
+13.0 8 318.0 150.0 3755. 14.0 76 1
+31.5 4 98.00 68.00 2045. 18.5 77 3
+30.0 4 111.0 80.00 2155. 14.8 77 1
+36.0 4 79.00 58.00 1825. 18.6 77 2
+25.5 4 122.0 96.00 2300. 15.5 77 1
+33.5 4 85.00 70.00 1945. 16.8 77 3
+17.5 8 305.0 145.0 3880. 12.5 77 1
+17.0 8 260.0 110.0 4060. 19.0 77 1
+15.5 8 318.0 145.0 4140. 13.7 77 1
+15.0 8 302.0 130.0 4295. 14.9 77 1
+17.5 6 250.0 110.0 3520. 16.4 77 1
+20.5 6 231.0 105.0 3425. 16.9 77 1
+19.0 6 225.0 100.0 3630. 17.7 77 1
+18.5 6 250.0 98.00 3525. 19.0 77 1
+16.0 8 400.0 180.0 4220. 11.1 77 1
+15.5 8 350.0 170.0 4165. 11.4 77 1
+15.5 8 400.0 190.0 4325. 12.2 77 1
+16.0 8 351.0 149.0 4335. 14.5 77 1
+29.0 4 97.00 78.00 1940. 14.5 77 2
+24.5 4 151.0 88.00 2740. 16.0 77 1
+26.0 4 97.00 75.00 2265. 18.2 77 3
+25.5 4 140.0 89.00 2755. 15.8 77 1
+30.5 4 98.00 63.00 2051. 17.0 77 1
+33.5 4 98.00 83.00 2075. 15.9 77 1
+30.0 4 97.00 67.00 1985. 16.4 77 3
+30.5 4 97.00 78.00 2190. 14.1 77 2
+22.0 6 146.0 97.00 2815. 14.5 77 3
+21.5 4 121.0 110.0 2600. 12.8 77 2
+21.5 3 80.00 110.0 2720. 13.5 77 3
+43.1 4 90.00 48.00 1985. 21.5 78 2
+36.1 4 98.00 66.00 1800. 14.4 78 1
+32.8 4 78.00 52.00 1985. 19.4 78 3
+39.4 4 85.00 70.00 2070. 18.6 78 3
+36.1 4 91.00 60.00 1800. 16.4 78 3
+19.9 8 260.0 110.0 3365. 15.5 78 1
+19.4 8 318.0 140.0 3735. 13.2 78 1
+20.2 8 302.0 139.0 3570. 12.8 78 1
+19.2 6 231.0 105.0 3535. 19.2 78 1
+20.5 6 200.0 95.00 3155. 18.2 78 1
+20.2 6 200.0 85.00 2965. 15.8 78 1
+25.1 4 140.0 88.00 2720. 15.4 78 1
+20.5 6 225.0 100.0 3430. 17.2 78 1
+19.4 6 232.0 90.00 3210. 17.2 78 1
+20.6 6 231.0 105.0 3380. 15.8 78 1
+20.8 6 200.0 85.00 3070. 16.7 78 1
+18.6 6 225.0 110.0 3620. 18.7 78 1
+18.1 6 258.0 120.0 3410. 15.1 78 1
+19.2 8 305.0 145.0 3425. 13.2 78 1
+17.7 6 231.0 165.0 3445. 13.4 78 1
+18.1 8 302.0 139.0 3205. 11.2 78 1
+17.5 8 318.0 140.0 4080. 13.7 78 1
+30.0 4 98.00 68.00 2155. 16.5 78 1
+27.5 4 134.0 95.00 2560. 14.2 78 3
+27.2 4 119.0 97.00 2300. 14.7 78 3
+30.9 4 105.0 75.00 2230. 14.5 78 1
+21.1 4 134.0 95.00 2515. 14.8 78 3
+23.2 4 156.0 105.0 2745. 16.7 78 1
+23.8 4 151.0 85.00 2855. 17.6 78 1
+23.9 4 119.0 97.00 2405. 14.9 78 3
+20.3 5 131.0 103.0 2830. 15.9 78 2
+17.0 6 163.0 125.0 3140. 13.6 78 2
+21.6 4 121.0 115.0 2795. 15.7 78 2
+16.2 6 163.0 133.0 3410. 15.8 78 2
+31.5 4 89.00 71.00 1990. 14.9 78 2
+29.5 4 98.00 68.00 2135. 16.6 78 3
+21.5 6 231.0 115.0 3245. 15.4 79 1
+19.8 6 200.0 85.00 2990. 18.2 79 1
+22.3 4 140.0 88.00 2890. 17.3 79 1
+20.2 6 232.0 90.00 3265. 18.2 79 1
+20.6 6 225.0 110.0 3360. 16.6 79 1
+17.0 8 305.0 130.0 3840. 15.4 79 1
+17.6 8 302.0 129.0 3725. 13.4 79 1
+16.5 8 351.0 138.0 3955. 13.2 79 1
+18.2 8 318.0 135.0 3830. 15.2 79 1
+16.9 8 350.0 155.0 4360. 14.9 79 1
+15.5 8 351.0 142.0 4054. 14.3 79 1
+19.2 8 267.0 125.0 3605. 15.0 79 1
+18.5 8 360.0 150.0 3940. 13.0 79 1
+31.9 4 89.00 71.00 1925. 14.0 79 2
+34.1 4 86.00 65.00 1975. 15.2 79 3
+35.7 4 98.00 80.00 1915. 14.4 79 1
+27.4 4 121.0 80.00 2670. 15.0 79 1
+25.4 5 183.0 77.00 3530. 20.1 79 2
+23.0 8 350.0 125.0 3900. 17.4 79 1
+27.2 4 141.0 71.00 3190. 24.8 79 2
+23.9 8 260.0 90.00 3420. 22.2 79 1
+34.2 4 105.0 70.00 2200. 13.2 79 1
+34.5 4 105.0 70.00 2150. 14.9 79 1
+31.8 4 85.00 65.00 2020. 19.2 79 3
+37.3 4 91.00 69.00 2130. 14.7 79 2
+28.4 4 151.0 90.00 2670. 16.0 79 1
+28.8 6 173.0 115.0 2595. 11.3 79 1
+26.8 6 173.0 115.0 2700. 12.9 79 1
+33.5 4 151.0 90.00 2556. 13.2 79 1
+41.5 4 98.00 76.00 2144. 14.7 80 2
+38.1 4 89.00 60.00 1968. 18.8 80 3
+32.1 4 98.00 70.00 2120. 15.5 80 1
+37.2 4 86.00 65.00 2019. 16.4 80 3
+28.0 4 151.0 90.00 2678. 16.5 80 1
+26.4 4 140.0 88.00 2870. 18.1 80 1
+24.3 4 151.0 90.00 3003. 20.1 80 1
+19.1 6 225.0 90.00 3381. 18.7 80 1
+34.3 4 97.00 78.00 2188. 15.8 80 2
+29.8 4 134.0 90.00 2711. 15.5 80 3
+31.3 4 120.0 75.00 2542. 17.5 80 3
+37.0 4 119.0 92.00 2434. 15.0 80 3
+32.2 4 108.0 75.00 2265. 15.2 80 3
+46.6 4 86.00 65.00 2110. 17.9 80 3
+27.9 4 156.0 105.0 2800. 14.4 80 1
+40.8 4 85.00 65.00 2110. 19.2 80 3
+44.3 4 90.00 48.00 2085. 21.7 80 2
+43.4 4 90.00 48.00 2335. 23.7 80 2
+36.4 5 121.0 67.00 2950. 19.9 80 2
+30.0 4 146.0 67.00 3250. 21.8 80 2
+44.6 4 91.00 67.00 1850. 13.8 80 3
+40.9 4 85.00 ? 1835. 17.3 80 2
+33.8 4 97.00 67.00 2145. 18.0 80 3
+29.8 4 89.00 62.00 1845. 15.3 80 2
+32.7 6 168.0 132.0 2910. 11.4 80 3
+23.7 3 70.00 100.0 2420. 12.5 80 3
+35.0 4 122.0 88.00 2500. 15.1 80 2
+23.6 4 140.0 ? 2905. 14.3 80 1
+32.4 4 107.0 72.00 2290. 17.0 80 3
+27.2 4 135.0 84.00 2490. 15.7 81 1
+26.6 4 151.0 84.00 2635. 16.4 81 1
+25.8 4 156.0 92.00 2620. 14.4 81 1
+23.5 6 173.0 110.0 2725. 12.6 81 1
+30.0 4 135.0 84.00 2385. 12.9 81 1
+39.1 4 79.00 58.00 1755. 16.9 81 3
+39.0 4 86.00 64.00 1875. 16.4 81 1
+35.1 4 81.00 60.00 1760. 16.1 81 3
+32.3 4 97.00 67.00 2065. 17.8 81 3
+37.0 4 85.00 65.00 1975. 19.4 81 3
+37.7 4 89.00 62.00 2050. 17.3 81 3
+34.1 4 91.00 68.00 1985. 16.0 81 3
+34.7 4 105.0 63.00 2215. 14.9 81 1
+34.4 4 98.00 65.00 2045. 16.2 81 1
+29.9 4 98.00 65.00 2380. 20.7 81 1
+33.0 4 105.0 74.00 2190. 14.2 81 2
+34.5 4 100.0 ? 2320. 15.8 81 2
+33.7 4 107.0 75.00 2210. 14.4 81 3
+32.4 4 108.0 75.00 2350. 16.8 81 3
+32.9 4 119.0 100.0 2615. 14.8 81 3
+31.6 4 120.0 74.00 2635. 18.3 81 3
+28.1 4 141.0 80.00 3230. 20.4 81 2
+30.7 6 145.0 76.00 3160. 19.6 81 2
+25.4 6 168.0 116.0 2900. 12.6 81 3
+24.2 6 146.0 120.0 2930. 13.8 81 3
+22.4 6 231.0 110.0 3415. 15.8 81 1
+26.6 8 350.0 105.0 3725. 19.0 81 1
+20.2 6 200.0 88.00 3060. 17.1 81 1
+17.6 6 225.0 85.00 3465. 16.6 81 1
+28.0 4 112.0 88.00 2605. 19.6 82 1
+27.0 4 112.0 88.00 2640. 18.6 82 1
+34.0 4 112.0 88.00 2395. 18.0 82 1
+31.0 4 112.0 85.00 2575. 16.2 82 1
+29.0 4 135.0 84.00 2525. 16.0 82 1
+27.0 4 151.0 90.00 2735. 18.0 82 1
+24.0 4 140.0 92.00 2865. 16.4 82 1
+23.0 4 151.0 ? 3035. 20.5 82 1
+36.0 4 105.0 74.00 1980. 15.3 82 2
+37.0 4 91.00 68.00 2025. 18.2 82 3
+31.0 4 91.00 68.00 1970. 17.6 82 3
+38.0 4 105.0 63.00 2125. 14.7 82 1
+36.0 4 98.00 70.00 2125. 17.3 82 1
+36.0 4 120.0 88.00 2160. 14.5 82 3
+36.0 4 107.0 75.00 2205. 14.5 82 3
+34.0 4 108.0 70.00 2245 16.9 82 3
+38.0 4 91.00 67.00 1965. 15.0 82 3
+32.0 4 91.00 67.00 1965. 15.7 82 3
+38.0 4 91.00 67.00 1995. 16.2 82 3
+25.0 6 181.0 110.0 2945. 16.4 82 1
+38.0 6 262.0 85.00 3015. 17.0 82 1
+26.0 4 156.0 92.00 2585. 14.5 82 1
+22.0 6 232.0 112.0 2835 14.7 82 1
+32.0 4 144.0 96.00 2665. 13.9 82 3
+36.0 4 135.0 84.00 2370. 13.0 82 1
+27.0 4 151.0 90.00 2950. 17.3 82 1
+27.0 4 140.0 86.00 2790. 15.6 82 1
+44.0 4 97.00 52.00 2130. 24.6 82 2
+32.0 4 135.0 84.00 2295. 11.6 82 1
+28.0 4 120.0 79.00 2625. 18.6 82 1
+31.0 4 119.0 82.00 2720. 19.4 82 1
diff -r 000000000000 -r 915447b14520 test-data/evaluation_report_classification.html
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/evaluation_report_classification.html Wed Dec 11 05:00:00 2024 +0000
@@ -0,0 +1,236 @@
+ Model Training Report
Model Evaluation Report
+ Model |
+ Accuracy |
+ AUC |
+ Recall |
+ Prec. |
+ F1 |
+ Kappa |
+ MCC |
+ PR-AUC-Weighted |
+ Light Gradient Boosting Machine |
+ 0.7826 |
+ 0.8162 |
+ 0.7419 |
+ 0.7667 |
+ 0.7541 |
+ 0.5594 |
+ 0.5596 |
+ 0.7753 |














\ No newline at end of file
diff -r 000000000000 -r 915447b14520 test-data/evaluation_report_regression.html
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/evaluation_report_regression.html Wed Dec 11 05:00:00 2024 +0000
@@ -0,0 +1,202 @@
+ Model Training Report
Model Evaluation Report
+ Model |
+ MAE |
+ MSE |
+ RMSE |
+ R2 |
+ MAPE |
+ Gradient Boosting Regressor |
+ 1.6 |
+ 5.6214 |
+ 2.3709 |
+ 0.9077 |
+ 0.0875 |
+ 0.0691 |









\ No newline at end of file
diff -r 000000000000 -r 915447b14520 test-data/expected_best_model_classification.csv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/expected_best_model_classification.csv Wed Dec 11 05:00:00 2024 +0000
@@ -0,0 +1,20 @@
diff -r 000000000000 -r 915447b14520 test-data/expected_best_model_classification_customized.csv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/expected_best_model_classification_customized.csv Wed Dec 11 05:00:00 2024 +0000
@@ -0,0 +1,20 @@
diff -r 000000000000 -r 915447b14520 test-data/expected_best_model_regression.csv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/expected_best_model_regression.csv Wed Dec 11 05:00:00 2024 +0000
@@ -0,0 +1,22 @@
diff -r 000000000000 -r 915447b14520 test-data/expected_comparison_result_classification.html
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/expected_comparison_result_classification.html Wed Dec 11 05:00:00 2024 +0000
@@ -0,0 +1,606 @@
+ Model Training Report
PyCaret Model Training Report
+ Setup & Best Model
+ Best Model Plots
+ Feature Importance
+ Explainer
Setup Parameters
+ Parameter | Value |
+ target |
+ PCR |
+ session_id |
+ 42 |
+ index |
+ False |
If you want to know all the experiment setup parameters,
+ please check the PyCaret documentation for
+ the classification/regression exp
Best Model: LGBMClassifier
+ Parameter | Value |
+ boosting_type |
+ gbdt |
+ class_weight |
+ None |
+ colsample_bytree |
+ 1.0 |
+ importance_type |
+ split |
+ learning_rate |
+ 0.1 |
+ max_depth |
+ -1 |
+ min_child_samples |
+ 20 |
+ min_child_weight |
+ 0.001 |
+ min_split_gain |
+ 0.0 |
+ n_estimators |
+ 100 |
+ n_jobs |
+ -1 |
+ num_leaves |
+ 31 |
+ objective |
+ None |
+ random_state |
+ 42 |
+ reg_alpha |
+ 0.0 |
+ reg_lambda |
+ 0.0 |
+ subsample |
+ 1.0 |
+ subsample_for_bin |
+ 200000 |
+ subsample_freq |
+ 0 |
Comparison Results on the Cross-Validation Set
+ Model |
+ Accuracy |
+ Recall |
+ Prec. |
+ F1 |
+ Kappa |
+ MCC |
+ PR-AUC-Weighted |
+ TT (Sec) |
+ Light Gradient Boosting Machine |
+ 0.715 |
+ 0.6000 |
+ 0.6500 |
+ 0.6917 |
+ 0.6357 |
+ 0.4380 |
+ 0.4748 |
+ 0.6822 |
+ 0.228 |
+ Logistic Regression |
+ 0.670 |
+ 0.6500 |
+ 0.7500 |
+ 0.6167 |
+ 0.6633 |
+ 0.3478 |
+ 0.3742 |
+ 0.7144 |
+ 0.331 |
+ Ridge Classifier |
+ 0.670 |
+ 0.6167 |
+ 0.7500 |
+ 0.6167 |
+ 0.6633 |
+ 0.3478 |
+ 0.3742 |
+ 0.0000 |
+ 0.180 |
+ Naive Bayes |
+ 0.650 |
+ 0.6333 |
+ 0.6833 |
+ 0.5917 |
+ 0.6257 |
+ 0.2969 |
+ 0.3112 |
+ 0.6978 |
+ 2.694 |
+ Quadratic Discriminant Analysis |
+ 0.610 |
+ 0.6333 |
+ 0.4667 |
+ 0.5333 |
+ 0.4733 |
+ 0.2256 |
+ 0.2488 |
+ 0.7033 |
+ 0.158 |
+ Linear Discriminant Analysis |
+ 0.605 |
+ 0.6000 |
+ 0.7000 |
+ 0.5900 |
+ 0.6105 |
+ 0.2372 |
+ 0.2577 |
+ 0.6594 |
+ 0.110 |
+ CatBoost Classifier |
+ 0.595 |
+ 0.6167 |
+ 0.6167 |
+ 0.5500 |
+ 0.5600 |
+ 0.2165 |
+ 0.2207 |
+ 0.6861 |
+ 12.075 |
+ Extra Trees Classifier |
+ 0.590 |
+ 0.6000 |
+ 0.5833 |
+ 0.5000 |
+ 0.5300 |
+ 0.2103 |
+ 0.2167 |
+ 0.6811 |
+ 0.775 |
+ SVM - Linear Kernel |
+ 0.585 |
+ 0.6500 |
+ 0.5333 |
+ 0.4667 |
+ 0.4521 |
+ 0.1429 |
+ 0.1690 |
+ 0.0000 |
+ 0.217 |
+ K Neighbors Classifier |
+ 0.565 |
+ 0.6292 |
+ 0.5000 |
+ 0.5750 |
+ 0.5057 |
+ 0.1413 |
+ 0.1469 |
+ 0.6717 |
+ 0.685 |
+ Random Forest Classifier |
+ 0.555 |
+ 0.5667 |
+ 0.5833 |
+ 0.5167 |
+ 0.5233 |
+ 0.1524 |
+ 0.1540 |
+ 0.6211 |
+ 0.847 |
+ Dummy Classifier |
+ 0.540 |
+ 0.5000 |
+ 0.0000 |
+ 0.0000 |
+ 0.0000 |
+ 0.0000 |
+ 0.0000 |
+ 0.4600 |
+ 0.165 |
+ Ada Boost Classifier |
+ 0.510 |
+ 0.4417 |
+ 0.5667 |
+ 0.4650 |
+ 0.4971 |
+ 0.0656 |
+ 0.0275 |
+ 0.5819 |
+ 0.645 |
+ Decision Tree Classifier |
+ 0.495 |
+ 0.5000 |
+ 0.4333 |
+ 0.4333 |
+ 0.4133 |
+ 0.0049 |
+ 0.0040 |
+ 0.5483 |
+ 0.329 |
+ Gradient Boosting Classifier |
+ 0.475 |
+ 0.4333 |
+ 0.4500 |
+ 0.4000 |
+ 0.4033 |
+ -0.0033 |
+ -0.0239 |
+ 0.5800 |
+ 0.643 |
+ Extreme Gradient Boosting |
+ 0.460 |
+ 0.4833 |
+ 0.4333 |
+ 0.3333 |
+ 0.3667 |
+ -0.0489 |
+ -0.0537 |
+ 0.6281 |
+ 0.422 |
Results on the Test Set for the best model
+ Model |
+ Accuracy |
+ Recall |
+ Prec. |
+ F1 |
+ Kappa |
+ MCC |
+ PR-AUC-Weighted |
+ Light Gradient Boosting Machine |
+ 0.8095 |
+ 0.7454 |
+ 0.6667 |
+ 0.8571 |
+ 0.75 |
+ 0.6 |
+ 0.6124 |
+ 0.6799 |
Best Model Plots on the testing set














PyCaret Feature Importance Report
Feature importance analysis from atrained Random Forest
Use gini impurity forcalculating feature importance for classificationand Variance Reduction for regression

SHAP Summary from a trained lightgbm

\ No newline at end of file
diff -r 000000000000 -r 915447b14520 test-data/expected_comparison_result_classification_customized.html
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/expected_comparison_result_classification_customized.html Wed Dec 11 05:00:00 2024 +0000
@@ -0,0 +1,620 @@
+ Model Training Report
PyCaret Model Training Report
+ Setup & Best Model
+ Best Model Plots
+ Feature Importance
+ Explainer
Setup Parameters
+ Parameter | Value |
+ target |
+ PCR |
+ session_id |
+ 42 |
+ index |
+ False |
+ train_size |
+ 0.8 |
+ normalize |
+ True |
+ feature_selection |
+ True |
+ fold |
+ 5 |
+ remove_outliers |
+ True |
+ remove_multicollinearity |
+ True |
If you want to know all the experiment setup parameters,
+ please check the PyCaret documentation for
+ the classification/regression exp
Best Model: LGBMClassifier
+ Parameter | Value |
+ boosting_type |
+ gbdt |
+ class_weight |
+ None |
+ colsample_bytree |
+ 1.0 |
+ importance_type |
+ split |
+ learning_rate |
+ 0.1 |
+ max_depth |
+ -1 |
+ min_child_samples |
+ 20 |
+ min_child_weight |
+ 0.001 |
+ min_split_gain |
+ 0.0 |
+ n_estimators |
+ 100 |
+ n_jobs |
+ -1 |
+ num_leaves |
+ 31 |
+ objective |
+ None |
+ random_state |
+ 42 |
+ reg_alpha |
+ 0.0 |
+ reg_lambda |
+ 0.0 |
+ subsample |
+ 1.0 |
+ subsample_for_bin |
+ 200000 |
+ subsample_freq |
+ 0 |
Comparison Results on the Cross-Validation Set
+ Model |
+ Accuracy |
+ Recall |
+ Prec. |
+ F1 |
+ Kappa |
+ MCC |
+ PR-AUC-Weighted |
+ TT (Sec) |
+ Light Gradient Boosting Machine |
+ 0.7091 |
+ 0.6267 |
+ 0.64 |
+ 0.6895 |
+ 0.6467 |
+ 0.4056 |
+ 0.4224 |
+ 0.5918 |
+ 0.322 |
+ Naive Bayes |
+ 0.6545 |
+ 0.6800 |
+ 0.72 |
+ 0.6117 |
+ 0.6498 |
+ 0.3163 |
+ 0.3232 |
+ 0.6930 |
+ 1.240 |
+ K Neighbors Classifier |
+ 0.6364 |
+ 0.6467 |
+ 0.56 |
+ 0.6067 |
+ 0.5743 |
+ 0.2603 |
+ 0.2660 |
+ 0.6001 |
+ 0.864 |
+ Ridge Classifier |
+ 0.6364 |
+ 0.6467 |
+ 0.64 |
+ 0.5962 |
+ 0.6048 |
+ 0.2700 |
+ 0.2835 |
+ 0.0000 |
+ 0.898 |
+ Random Forest Classifier |
+ 0.6364 |
+ 0.6300 |
+ 0.60 |
+ 0.6343 |
+ 0.6013 |
+ 0.2688 |
+ 0.2834 |
+ 0.6539 |
+ 0.906 |
+ Logistic Regression |
+ 0.6364 |
+ 0.6400 |
+ 0.64 |
+ 0.5962 |
+ 0.6048 |
+ 0.2700 |
+ 0.2835 |
+ 0.6697 |
+ 0.798 |
+ Quadratic Discriminant Analysis |
+ 0.6364 |
+ 0.6933 |
+ 0.72 |
+ 0.5851 |
+ 0.6353 |
+ 0.2815 |
+ 0.2899 |
+ 0.7075 |
+ 0.418 |
+ Linear Discriminant Analysis |
+ 0.6364 |
+ 0.6467 |
+ 0.64 |
+ 0.5962 |
+ 0.6048 |
+ 0.2700 |
+ 0.2835 |
+ 0.6751 |
+ 0.364 |
+ Gradient Boosting Classifier |
+ 0.6182 |
+ 0.6333 |
+ 0.60 |
+ 0.5843 |
+ 0.5846 |
+ 0.2328 |
+ 0.2389 |
+ 0.6403 |
+ 0.522 |
+ Ada Boost Classifier |
+ 0.6182 |
+ 0.6567 |
+ 0.60 |
+ 0.5943 |
+ 0.5891 |
+ 0.2340 |
+ 0.2415 |
+ 0.6517 |
+ 0.560 |
+ Extra Trees Classifier |
+ 0.6182 |
+ 0.5800 |
+ 0.56 |
+ 0.5876 |
+ 0.5622 |
+ 0.2266 |
+ 0.2347 |
+ 0.6413 |
+ 0.468 |
+ Decision Tree Classifier |
+ 0.6000 |
+ 0.5967 |
+ 0.56 |
+ 0.5867 |
+ 0.5533 |
+ 0.1950 |
+ 0.2060 |
+ 0.5215 |
+ 1.532 |
+ CatBoost Classifier |
+ 0.5818 |
+ 0.6667 |
+ 0.48 |
+ 0.5133 |
+ 0.4845 |
+ 0.1454 |
+ 0.1414 |
+ 0.6991 |
+ 3.426 |
+ SVM - Linear Kernel |
+ 0.5455 |
+ 0.5000 |
+ 0.40 |
+ 0.5033 |
+ 0.4332 |
+ 0.0684 |
+ 0.0685 |
+ 0.0000 |
+ 1.666 |
+ Dummy Classifier |
+ 0.5455 |
+ 0.5000 |
+ 0.00 |
+ 0.0000 |
+ 0.0000 |
+ 0.0000 |
+ 0.0000 |
+ 0.4545 |
+ 0.456 |
+ Extreme Gradient Boosting |
+ 0.5273 |
+ 0.5600 |
+ 0.52 |
+ 0.4967 |
+ 0.5042 |
+ 0.0550 |
+ 0.0564 |
+ 0.5943 |
+ 0.336 |
Results on the Test Set for the best model
+ Model |
+ Accuracy |
+ Recall |
+ Prec. |
+ F1 |
+ Kappa |
+ MCC |
+ PR-AUC-Weighted |
+ Light Gradient Boosting Machine |
+ 0.7857 |
+ 0.7604 |
+ 0.6667 |
+ 0.8 |
+ 0.7273 |
+ 0.5532 |
+ 0.5594 |
+ 0.7502 |
Best Model Plots on the testing set














PyCaret Feature Importance Report
Feature importance analysis from atrained Random Forest
Use gini impurity forcalculating feature importance for classificationand Variance Reduction for regression

SHAP Summary from a trained lightgbm

\ No newline at end of file
diff -r 000000000000 -r 915447b14520 test-data/expected_comparison_result_regression.html
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/expected_comparison_result_regression.html Wed Dec 11 05:00:00 2024 +0000
@@ -0,0 +1,591 @@
+ Model Training Report
PyCaret Model Training Report
+ Setup & Best Model
+ Best Model Plots
+ Feature Importance
+ Explainer
Setup Parameters
+ Parameter | Value |
+ target |
+ MPG |
+ session_id |
+ 42 |
+ index |
+ False |
If you want to know all the experiment setup parameters,
+ please check the PyCaret documentation for
+ the classification/regression exp
Best Model: GradientBoostingRegressor
+ Parameter | Value |
+ alpha |
+ 0.9 |
+ ccp_alpha |
+ 0.0 |
+ criterion |
+ friedman_mse |
+ init |
+ None |
+ learning_rate |
+ 0.1 |
+ loss |
+ squared_error |
+ max_depth |
+ 3 |
+ max_features |
+ None |
+ max_leaf_nodes |
+ None |
+ min_impurity_decrease |
+ 0.0 |
+ min_samples_leaf |
+ 1 |
+ min_samples_split |
+ 2 |
+ min_weight_fraction_leaf |
+ 0.0 |
+ n_estimators |
+ 100 |
+ n_iter_no_change |
+ None |
+ random_state |
+ 42 |
+ subsample |
+ 1.0 |
+ tol |
+ 0.0001 |
+ validation_fraction |
+ 0.1 |
+ verbose |
+ 0 |
+ warm_start |
+ False |
Comparison Results on the Cross-Validation Set
+ Model |
+ MAE |
+ MSE |
+ RMSE |
+ R2 |
+ MAPE |
+ TT (Sec) |
+ Gradient Boosting Regressor |
+ 2.2775 |
+ 9.8743 |
+ 3.0921 |
+ 0.8383 |
+ 0.1197 |
+ 0.0980 |
+ 0.681 |
+ Extra Trees Regressor |
+ 2.2119 |
+ 10.2477 |
+ 3.1304 |
+ 0.8323 |
+ 0.1220 |
+ 0.0949 |
+ 2.212 |
+ Light Gradient Boosting Machine |
+ 2.3218 |
+ 10.4931 |
+ 3.1818 |
+ 0.8282 |
+ 0.1252 |
+ 0.1011 |
+ 0.263 |
+ CatBoost Regressor |
+ 2.3204 |
+ 10.5063 |
+ 3.1906 |
+ 0.8270 |
+ 0.1256 |
+ 0.1011 |
+ 8.883 |
+ Random Forest Regressor |
+ 2.3161 |
+ 11.0170 |
+ 3.2515 |
+ 0.8210 |
+ 0.1252 |
+ 0.0990 |
+ 1.916 |
+ Extreme Gradient Boosting |
+ 2.4277 |
+ 11.9887 |
+ 3.3949 |
+ 0.8045 |
+ 0.1336 |
+ 0.1057 |
+ 0.497 |
+ Elastic Net |
+ 2.6119 |
+ 12.1337 |
+ 3.4462 |
+ 0.8029 |
+ 0.1426 |
+ 0.1168 |
+ 0.116 |
+ Lasso Regression |
+ 2.6238 |
+ 12.2869 |
+ 3.4649 |
+ 0.8011 |
+ 0.1438 |
+ 0.1172 |
+ 0.134 |
+ Lasso Least Angle Regression |
+ 2.6238 |
+ 12.2868 |
+ 3.4649 |
+ 0.8011 |
+ 0.1438 |
+ 0.1172 |
+ 0.157 |
+ AdaBoost Regressor |
+ 2.5949 |
+ 12.5846 |
+ 3.4968 |
+ 0.7939 |
+ 0.1378 |
+ 0.1153 |
+ 2.469 |
+ Bayesian Ridge |
+ 2.6494 |
+ 12.5149 |
+ 3.5121 |
+ 0.7920 |
+ 0.1433 |
+ 0.1194 |
+ 0.268 |
+ Ridge Regression |
+ 2.6852 |
+ 12.7684 |
+ 3.5480 |
+ 0.7872 |
+ 0.1448 |
+ 0.1212 |
+ 0.108 |
+ Linear Regression |
+ 2.6893 |
+ 12.7997 |
+ 3.5523 |
+ 0.7866 |
+ 0.1450 |
+ 0.1214 |
+ 0.122 |
+ Least Angle Regression |
+ 2.7583 |
+ 13.3766 |
+ 3.6327 |
+ 0.7759 |
+ 0.1489 |
+ 0.1249 |
+ 0.165 |
+ Huber Regressor |
+ 2.6780 |
+ 14.2077 |
+ 3.7197 |
+ 0.7699 |
+ 0.1404 |
+ 0.1138 |
+ 1.508 |
+ Decision Tree Regressor |
+ 2.6552 |
+ 15.5784 |
+ 3.8636 |
+ 0.7507 |
+ 0.1470 |
+ 0.1108 |
+ 0.253 |
+ Orthogonal Matching Pursuit |
+ 3.3731 |
+ 20.2491 |
+ 4.4464 |
+ 0.6709 |
+ 0.1767 |
+ 0.1475 |
+ 0.418 |
+ K Neighbors Regressor |
+ 3.4315 |
+ 21.1052 |
+ 4.5405 |
+ 0.6546 |
+ 0.1692 |
+ 0.1448 |
+ 0.858 |
+ Dummy Regressor |
+ 6.6547 |
+ 62.8366 |
+ 7.8973 |
+ -0.0391 |
+ 0.3303 |
+ 0.3219 |
+ 0.129 |
+ Passive Aggressive Regressor |
+ 7.5227 |
+ 84.7568 |
+ 9.0993 |
+ -0.4762 |
+ 0.4067 |
+ 0.3652 |
+ 0.420 |
Results on the Test Set for the best model
+ Model |
+ MAE |
+ MSE |
+ RMSE |
+ R2 |
+ MAPE |
+ Gradient Boosting Regressor |
+ 2.2015 |
+ 9.911 |
+ 3.1482 |
+ 0.8273 |
+ 0.1198 |
+ 0.094 |
Best Model Plots on the testing set









PyCaret Feature Importance Report
Feature importance analysis from atrained Random Forest
Use gini impurity forcalculating feature importance for classificationand Variance Reduction for regression

SHAP Summary from a trained lightgbm

\ No newline at end of file
diff -r 000000000000 -r 915447b14520 test-data/expected_model_classification.h5
Binary file test-data/expected_model_classification.h5 has changed
diff -r 000000000000 -r 915447b14520 test-data/expected_model_classification_customized.h5
Binary file test-data/expected_model_classification_customized.h5 has changed
diff -r 000000000000 -r 915447b14520 test-data/expected_model_regression.h5
Binary file test-data/expected_model_regression.h5 has changed
diff -r 000000000000 -r 915447b14520 test-data/pcr.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/pcr.tsv Wed Dec 11 05:00:00 2024 +0000
@@ -0,0 +1,70 @@
+0.1446255786628313 0.1699650673189346 0.9375600251501058 0.6976422301857574 0.0957045465168299 0.1228059681653541 0.1203946505834535 0.6617311325227386 0.5265003537481928 0.184448963872354 0
+0.3704854668147701 0.3635509393089497 0.2775010191320449 0.3686096626765244 0.2591694877907093 0.4492672714584788 0.4011669070407762 0.5399668857265811 0.138846165677197 0.0619176638311385 0
+0.0418563439424193 0.0619893695619427 0.1075443408806682 0.5687699992462811 0.533482902986719 0.0156581264745954 0.0292513439097941 0.237518325905719 0.5496170291303947 0.0824645446376087 0
+0.1909378671820324 0.0613509694356973 0.3629699235132763 0.3137868894020268 0.5779331343522767 0.2338351111554182 0.2060115379572571 0.3704859920788323 0.6821187978713588 0.1031398012202455 0
+0.3100284435655261 0.1002646849961624 0.4381507762676965 0.1638859006598466 0.6850690758064408 0.3223872661416277 0.2810213714435557 0.7561870445875668 0.6041327017133716 0.1048289561698074 0
+0.6841865332879608 0.409759631592916 0.8638163213133329 0.7138334829351185 0.1695946261760247 0.5548865184594425 0.5307263668545956 0.6849694750585335 0.3038312467316744 0.1088018486111768 1
+0.7546533801614157 0.1863769716880304 0.4316006937007274 0.4769875910458192 0.5771919633020173 0.7850330641677468 0.748793759014029 0.1828214236220012 0.6132763234796518 0.7975784274643083 0
+0.6353830250341901 0.3786932164606809 0.5679018316739328 0.1040817305386349 0.1566875663548406 0.5730967334596087 0.5204929854464402 0.5372681453818717 0.0586914392937339 0.0612420018513138 1
+0.0166680578117074 0.3562631356205751 0.1643117231277334 0.0396456150688968 0.0395224454638328 0.0423354268434519 0.0142454438180149 0.5395511338356394 0.0002845365898673 0.0720863766275008 1
+0.5102184953803285 0.4378492371477142 0.9462451029841566 0.8621174019994107 0.2530665117552399 0.5187252849499885 0.474170709322145 0.7239699199859956 0.632793995508936 0.285088815767248 0
+0.0802840504150962 0.423273629770965 0.2952995557275221 0.1087890477789274 0.0522291887849468 0.0730801182999368 0.0557362003408942 0.7561870445875668 0.1211972069273124 0.079768653338108 1
+0.3646619196517429 0.4965031453759028 0.5356212559852415 0.5401492363456967 0.3042607319564912 0.4574153457614728 0.4296053494165464 0.7187110232602242 0.2191393152665416 0.1756923846138254 1
+0.2492696488535895 0.1788094196297279 0.3172368048310312 0.4172587928163735 0.2613195515581284 0.2899212441431563 0.2440540186180673 0.2416685509223127 0.3306930388507797 0.0510057228569691 0
+0.1849281206058544 0.404946525023133 0.3782810869820563 0.3862124253989571 0.1854263068982325 0.2116173196424418 0.1936475678510554 0.6157504321631497 0.213740810237165 0.1145247055802923 1
+0.1111803116592936 0.0732366886400642 0.658262569871002 0.7094619132126927 0.2154003325253901 0.1214036486890638 0.0958502687819588 0.1784013245709367 0.6119766833799871 0.8450707080261888 1
+0.0712373272037342 0.6312270911190652 0.4727114439891937 0.4745003186175425 0.1220261346260424 0.0398497989565679 0.0539202832044053 0.1447910664400697 0.1038404749453997 0.1136531016263183 1
+0.4914637200074481 0.3637661303627403 0.8727155896110713 0.1880049060249549 0.5807308881365894 0.3977004619014389 0.3426642192211879 0.1356664065178225 0.7099880033221571 0.2581434160118376 0
+0.1879650972410383 0.6843649353350882 0.3442040751463059 0.3207073994641743 0.1664095938249101 0.2249227395075267 0.1813425986626459 0.6826135476765304 0.0589759758836014 0.0306615406444463 1
+0.0640140741073664 0.7704054916756926 0.2979182068803504 0.3257436122321728 0.1189880010416458 0.1843019971421925 0.1558607578340107 0.6701045214841611 0.3286013104063491 0.0820591474497138 0
+0.0302670356411359 0.7082081040950856 0.274744180583289 0.0606127049601557 0.0570301075699605 0.0660818130462233 0.0421200996459945 0.5280705465313893 0.1142837368113445 0.0651473280947008 1
+0.0790063372007165 0.0583813328933871 0.0643607796532877 0.4587818531892589 0.2408939457676461 0.2613365234439903 0.2289891176084961 0.148802707493016 0.4699698545018302 0.2556502233062843 0
+0.2611671492869847 0.99817087604278 0.0332478874601847 0.1521278855443564 0.0752789407263476 0.2566909247997874 0.215425462173856 0.2394512075039568 0.0693500261466055 0.0944507881596995 0
+0.0521807803681611 0.0619176392106792 0.2881967235768424 0.5927588167983391 0.7092673090149102 0.3005549463330343 0.267274157598007 0.1770154849344644 0.836160755483097 0.2165158814348358 0
+0.1515406396270875 0.3608610511365673 0.1023001292052151 0.0631684972900379 0.0374658627297797 0.1163127637656598 0.0826012849088763 0.506910963450303 0.0079208834476606 0.053025952176645 1
+0.3485139360629739 0.1691114761388986 0.24853003157582 0.4706700561177993 0.3928340110708253 0.4240587512045991 0.3806411433066737 0.268772656654583 0.3584853425205327 0.5235501983067912 1
+0.5568967620564121 0.1623186118742424 0.1611403223911947 0.1395613355899221 0.536728030287855 0.4711726979696276 0.4458633800970237 0.1440981466218335 0.577986096157987 0.1824963007506604 0
+0.0848940910579336 0.2717073975511259 0.1638280672419868 0.3765374152922717 0.5923158590573105 0.1014122885720931 0.0962894978366329 0.2185031473147533 0.5506936540650282 0.8636581690911671 1
+0.1809858295825922 0.4611257361327298 0.4665759236100074 0.2883865618768971 0.1458103804010336 0.1610008972186223 0.1314409335256326 0.4802955485372097 0.3677058660678581 0.1415376715336851 1
+0.6925590862103285 0.1141373349305291 0.2721946231284243 0.6383793672872287 0.7640939350840996 0.6058817665237763 0.5770551986364232 0.2436743714487859 0.7375034605801472 0.3529185219218529 0
+0.4575304821280667 0.1861617806342397 0.0947136411687889 0.2999458692777317 0.4435007311552253 0.5126175522546773 0.4606726104628294 0.1177526057432111 0.8363760804700238 0.0629244001810774 0
+0.4245089793061825 0.0815861015271391 0.791254119711741 0.2688995018603153 0.551464647476346 0.5569268600671253 0.5007211223285695 0.0851853742861102 0.4569350026146606 0.1206597163571009 0
+0.2982529358510918 0.795525460688181 0.1387194174019387 0.1399656030093941 0.0630462797887328 0.3092745821287344 0.2670250426117739 0.6415416371871832 0.1101079701005875 0.0785524617744234 1
+0.233609636140664 0.4062161522404975 0.9879086028563334 0.9661991325380456 0.1685663348089982 0.1614461834978234 0.1261046282942179 0.6561367167270843 0.3333538404749454 0.2720890792754201 0
+0.681817306272352 0.0667953030965992 0.8540257443118612 0.8451861343127112 0.4818479864853135 0.5086033296779984 0.4599842664219221 0.6089087606946703 0.4956012181242119 0.0572691094099444 0
+0.0810095860594425 0.3372330734303606 0.1099557109396051 0.2463907141829344 0.0774022956270907 0.11465789386236 0.0969188409597482 0.6832408224593548 0.0955581531268264 0.1186327304176267 1
+0.034209326664398 0.0837810502758032 0.2509483110045533 0.3819162275682972 0.6583067913970739 0.1820755657461868 0.1503998951094795 0.0533110626472453 0.7082654034267435 0.0910184253021898 0
+0.4162519984333568 0.2750356858497535 0.6070848677892088 0.554524711702514 0.0419395978980121 0.2435117801482073 0.2163694768585289 0.796989081042443 0.0321910855455412 0.0636676283588846 1
+0.4053304397516485 0.9309523638737258 0.2169403928667665 0.083190012539142 0.1264464520609228 0.292719237031868 0.2567326602858267 0.180611374096469 0.2547832907810144 0.0648838199225691 1
+0.4072116958914137 0.147032874019984 0.2906080936357794 0.2531810364320317 0.2807101887649152 0.4767753298109195 0.4525173724924609 0.455189969438589 0.5452182472546064 0.2009553860394722 0
+0.7831868350594233 0.4952335181585384 0.8876881939598288 0.8637344716772986 0.7519480779631817 0.8452530488818 0.815268126393077 0.4242128066170194 0.8149127933802947 0.2147591602872915 0
+0.1676886232158564 0.191613287330268 0.3303438793347657 0.2457877390488068 0.6168679847492371 0.1524739972751138 0.1259145142257768 0.1578617223798513 0.7106416684610415 0.0419113126085282 0
+0.1353990767077375 0.4476834683059442 0.0851234358914123 0.1091933151983994 0.0497853274840914 0.1853853055527863 0.1404156286875574 0.6329713131195248 0.0379510289458304 0.0741066059471767 1
+0.1346093343692013 0.8714448644655013 0.3202700181716424 0.2830077496008716 0.1089654988214712 0.1790183763665969 0.1542283991084305 0.1640177679229181 0.1901857947030053 0.0553840124862333 1
+0.4774987640211369 0.8638055820559354 0.0964340742480877 0.534366156650199 0.0 0.2400691190642343 0.2139242165989248 0.6193463213251543 0.050839767449014 0.0921602940480936 0
+0.0203727840664666 0.994211360653033 0.3367557745058073 0.0470389124521217 0.0420330789313782 0.0131126840128933 0.0 0.2207131968402855 0.1121996985450182 0.033627696735877 1
+0.297103636025092 0.5374755220176315 0.708134401061279 0.6246068670645389 0.1705361137263543 0.2226298474728342 0.1999213321096106 0.1626319282864456 0.1491433141591559 0.1546049742234955 1
+0.1330298496921289 0.130269490929697 0.3401344563362375 0.3424076523026113 0.7599340290993104 0.1119795301232844 0.1110397272846466 0.3139947921605239 0.4837198929527208 0.9153260406883644 0
+0.0762775526976442 0.4896385507599831 0.1103011794294241 0.0916522203874115 0.0825571068955616 0.0604127205662446 0.0665989248721646 0.619827718251508 0.0961349164846657 0.1151328013621346 0
+0.0705117915593879 0.3750134494408619 0.0521519232230828 0.0816414627628595 0.1074364162042694 0.0879805934935035 0.0779598793759014 0.3037614605290989 0.083392291365468 0.0954642811294366 1
+0.1357586341952011 0.8754761102065116 0.1697563065272815 0.135731073090179 0.1530885465702476 0.1466786295816303 0.1258555133079848 0.562096556553198 0.1268187271217201 0.1500915521982663 0
+0.2510289122743937 0.3406259190451255 0.581907124251197 0.7472232309874403 0.6858369557233763 0.1766656697570863 0.127546872951357 0.1621505313600921 0.6288335537851057 0.2456842091038695 1
+0.1956699005438307 0.6519571626342255 0.389108069452985 0.1192040728229514 0.0828842905123428 0.2295018775130429 0.1958437131244264 0.5566990758637792 0.0865606447445322 0.1591859624467071 0
+0.3356469145473107 0.3457905043360997 0.2730859318321574 0.6796352000438528 0.4441484211721186 0.2817731698401622 0.2434836764127442 0.5422498741803488 0.5881448214340645 0.6631892596771687 1
+0.1143970670382094 0.5981809182919569 0.180521104670043 0.2306037288530453 0.0763072320933741 0.193726115707972 0.1536580569031074 0.8881919169079727 0.450452182472546 0.078484895576441 0
+0.5797222418409345 0.2175079441364024 0.2170785802626942 0.4617350609484524 0.3569039081749164 0.480271159405842 0.4535203880949259 0.2057096593022661 0.6266726137377342 1.0000000000000002 0
+0.133877378055436 0.8522713415727596 0.1536851123808997 0.2878452546542144 0.0997108765182321 0.1765327484797129 0.1496459944932476 0.431338939905617 0.2374342489772063 0.138774214036202 0
+0.7981790981527734 0.2165897956402292 0.7117894576835645 0.6045784998252742 0.6593417599807698 0.8219519489582297 0.7853284384423758 0.5817317160341645 0.4874650096896245 0.8523475875489013 0
+0.2857518924923112 0.924087769257806 0.2979873005783142 0.2512282192362771 0.2142184651749765 0.3015717941049414 0.2615051789694506 0.6093245125856119 0.1986834415085053 0.1229502104687067 1
+0.0858058261154308 0.8986163215241265 0.4177335885193911 0.2842205518592875 0.1110287587721933 0.2480909181537235 0.2154910187491805 0.5918994026301777 0.0901596480974499 0.0779511226123795 1
+0.4424162263157557 0.4638156243051122 0.9354181205132276 0.6676031053219408 0.4695285217310017 0.484285381982521 0.4451094794807919 0.540032530761993 0.6746285643975516 0.1133828368343885 0
+0.1224678484978843 0.6581116267726363 0.6069466803932813 0.0373639023454362 0.0411183002477246 0.1476356627787193 0.1003671168218172 0.7746478873239436 0.013611615245009 0.0415734816186158 0
+0.8225776419449493 0.6945506452145096 0.9266570396114168 0.366999444988797 0.5741872158009655 0.5425381317914466 0.488796381277042 0.5683838921670884 0.7889199913870006 0.0824645446376087 1
+0.1426223298041053 0.8913285178357517 0.0756023243119994 0.1456733108131257 0.0894279628479664 0.1422191207257502 0.1296184607316113 0.6501192551476648 0.1399920022147712 0.136152645554482 1
+0.7436098287607467 0.0508783381512219 0.9367999944725042 0.8668932391413087 0.6116530785307452 0.6847572525171968 0.6387570473318473 0.0916842327918833 0.5387431172905965 0.5302865482456436 0
+0.5959857974792453 0.3464289044623451 0.422286863215206 0.1385540930363224 0.4875369750873046 0.4499052935898715 0.4171758227350202 0.0580812685538398 0.5668276477283215 0.0697958825158949 0
+0.5064559831007982 0.1684730760126532 0.8784434571722713 0.6313218174218702 0.1444549054172259 0.304369786993653 0.2628228661334732 0.7438093084660212 0.6597249992309823 0.4005459348796984 1
+0.6454570553525911 0.1390421128892268 0.3442040751463059 0.3152600672865434 0.3017567757056149 0.6874954308310904 0.6415169791530091 0.1059292054762546 0.5925359131317481 0.203847219313122 0
+0.0556351005155797 0.9816872413224208 0.0713945181060035 0.0874862103698019 0.0640745711557594 0.0695842887050144 0.030064245443818 0.4076192004434686 0.1690147343812482 0.0523502901968203 1
+0.3732206719872614 0.2336401001355704 0.362348080231602 0.2704480516365979 0.6884010069242736 0.3512311833316719 0.3141667759276256 0.0 0.7696176443446432 0.3280676743038993 0
diff -r 000000000000 -r 915447b14520 test-data/predictions_classification.csv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/predictions_classification.csv Wed Dec 11 05:00:00 2024 +0000
@@ -0,0 +1,70 @@
diff -r 000000000000 -r 915447b14520 test-data/predictions_regression.csv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/predictions_regression.csv Wed Dec 11 05:00:00 2024 +0000
@@ -0,0 +1,399 @@
diff -r 000000000000 -r 915447b14520 utils.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/utils.py Wed Dec 11 05:00:00 2024 +0000
@@ -0,0 +1,157 @@
+import base64
+import logging
+LOG = logging.getLogger(__name__)
+def get_html_template():
+ return """
+ Model Training Report
+ """
+def get_html_closing():
+ return """
+ """
+def customize_figure_layout(fig, margin_dict=None):
+ """
+ Update the layout of a Plotly figure to reduce margins.
+ Parameters:
+ fig (plotly.graph_objects.Figure): The Plotly figure to customize.
+ margin_dict (dict, optional): A dictionary specifying margin sizes.
+ Example: {'l': 10, 'r': 10, 't': 10, 'b': 10}
+ Returns:
+ plotly.graph_objects.Figure: The updated Plotly figure.
+ """
+ if margin_dict is None:
+ # Set default smaller margins
+ margin_dict = {'l': 40, 'r': 40, 't': 40, 'b': 40}
+ fig.update_layout(margin=margin_dict)
+ return fig
+def add_plot_to_html(fig, include_plotlyjs=True):
+ custom_margin = {'l': 40, 'r': 40, 't': 60, 'b': 60}
+ fig = customize_figure_layout(fig, margin_dict=custom_margin)
+ return fig.to_html(full_html=False,
+ default_height=350,
+ include_plotlyjs="cdn" if include_plotlyjs else False)
+def add_hr_to_html():
+ return "
+def encode_image_to_base64(image_path):
+ """Convert an image file to a base64 encoded string."""
+ with open(image_path, "rb") as img_file:
+ return base64.b64encode(img_file.read()).decode("utf-8")