Galaxy |

Changeset 0:915447b14520 (2024-12-11)

Next changeset 1:f6def1b90150 (2024-12-11)

Commit message:
planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1

added:
base_model_trainer.py
dashboard.py
feature_importance.py
pycaret_classification.py
pycaret_macros.xml
pycaret_predict.py
pycaret_regression.py
pycaret_train.py
pycaret_train.xml
test-data/auto-mpg.tsv
test-data/evaluation_report_classification.html
test-data/evaluation_report_regression.html
test-data/expected_best_model_classification.csv
test-data/expected_best_model_classification_customized.csv
test-data/expected_best_model_regression.csv
test-data/expected_comparison_result_classification.html
test-data/expected_comparison_result_classification_customized.html
test-data/expected_comparison_result_regression.html
test-data/expected_model_classification.h5
test-data/expected_model_classification_customized.h5
test-data/expected_model_regression.h5
test-data/pcr.tsv
test-data/predictions_classification.csv
test-data/predictions_regression.csv
utils.py

diff -r 000000000000 -r 915447b14520 base_model_trainer.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/base_model_trainer.py Wed Dec 11 05:00:00 2024 +0000

[

b'@@ -0,0 +1,359 @@\n+import base64\n+import logging\n+import os\n+import tempfile\n+\n+from feature_importance import FeatureImportanceAnalyzer\n+\n+import h5py\n+\n+import joblib\n+\n+import numpy as np\n+\n+import pandas as pd\n+\n+from sklearn.metrics import average_precision_score\n+\n+from utils import get_html_closing, get_html_template\n+\n+logging.basicConfig(level=logging.DEBUG)\n+LOG = logging.getLogger(__name__)\n+\n+\n+class BaseModelTrainer:\n+\n+ def __init__(\n+ self,\n+ input_file,\n+ target_col,\n+ output_dir,\n+ task_type,\n+ random_seed,\n+ test_file=None,\n+ **kwargs\n+ ):\n+ self.exp = None # This will be set in the subclass\n+ self.input_file = input_file\n+ self.target_col = target_col\n+ self.output_dir = output_dir\n+ self.task_type = task_type\n+ self.random_seed = random_seed\n+ self.data = None\n+ self.target = None\n+ self.best_model = None\n+ self.results = None\n+ self.features_name = None\n+ self.plots = {}\n+ self.expaliner = None\n+ self.plots_explainer_html = None\n+ self.trees = []\n+ for key, value in kwargs.items():\n+ setattr(self, key, value)\n+ self.setup_params = {}\n+ self.test_file = test_file\n+ self.test_data = None\n+\n+ LOG.info(f"Model kwargs: {self.__dict__}")\n+\n+ def load_data(self):\n+ LOG.info(f"Loading data from {self.input_file}")\n+ self.data = pd.read_csv(self.input_file, sep=None, engine=\'python\')\n+ self.data.columns = self.data.columns.str.replace(\'.\', \'_\')\n+\n+ numeric_cols = self.data.select_dtypes(include=[\'number\']).columns\n+ non_numeric_cols = self.data.select_dtypes(exclude=[\'number\']).columns\n+\n+ self.data[numeric_cols] = self.data[numeric_cols].apply(\n+ pd.to_numeric, errors=\'coerce\')\n+\n+ if len(non_numeric_cols) > 0:\n+ LOG.info(f"Non-numeric columns found: {non_numeric_cols.tolist()}")\n+\n+ names = self.data.columns.to_list()\n+ target_index = int(self.target_col)-1\n+ self.target = names[target_index]\n+ self.features_name = [name\n+ for i, name in enumerate(names)\n+ if i != target_index]\n+ if hasattr(self, \'missing_value_strategy\'):\n+ if self.missing_value_strategy == \'mean\':\n+ self.data = self.data.fillna(\n+ self.data.mean(numeric_only=True))\n+ elif self.missing_value_strategy == \'median\':\n+ self.data = self.data.fillna(\n+ self.data.median(numeric_only=True))\n+ elif self.missing_value_strategy == \'drop\':\n+ self.data = self.data.dropna()\n+ else:\n+ # Default strategy if not specified\n+ self.data = self.data.fillna(self.data.median(numeric_only=True))\n+\n+ if self.test_file:\n+ LOG.info(f"Loading test data from {self.test_file}")\n+ self.test_data = pd.read_csv(\n+ self.test_file, sep=None, engine=\'python\')\n+ self.test_data = self.test_data[numeric_cols].apply(\n+ pd.to_numeric, errors=\'coerce\')\n+ self.test_data.columns = self.test_data.columns.str.replace(\n+ \'.\', \'_\'\n+ )\n+\n+ def setup_pycaret(self):\n+ LOG.info("Initializing PyCaret")\n+ self.setup_params = {\n+ \'target\': self.target,\n+ \'session_id\': self.random_seed,\n+ \'html\': True,\n+ \'log_experiment\': False,\n+ \'system_log\': False,\n+ \'index\': False,\n+ }\n+\n+ if self.test_data is not None:\n+ self.setup_params[\'test_data\'] = self.test_data\n+\n+ if hasattr(self, \'train_size\') and self.train_size is not None \\\n+ and self.test_data is None:\n+ self.setup_params[\'train_size\'] = self.tra'..b'\n+ Best Model Plots</div>\n+ <div class="tab" onclick="openTab(event, \'feature\')">\n+ Feature Importance</div>\n+ <div class="tab" onclick="openTab(event, \'explainer\')">\n+ Explainer\n+ </div>\n+ </div>\n+ <div id="summary" class="tab-content">\n+ <h2>Setup Parameters</h2>\n+ <table>\n+ <tr><th>Parameter</th><th>Value</th></tr>\n+ {setup_params_table.to_html(\n+ index=False, header=False, classes=\'table\')}\n+ </table>\n+ <h5>If you want to know all the experiment setup parameters,\n+ please check the PyCaret documentation for\n+ the classification/regression <code>exp</code> function.</h5>\n+ <h2>Best Model: {model_name}</h2>\n+ <table>\n+ <tr><th>Parameter</th><th>Value</th></tr>\n+ {best_model_params.to_html(\n+ index=False, header=False, classes=\'table\')}\n+ </table>\n+ <h2>Comparison Results on the Cross-Validation Set</h2>\n+ <table>\n+ {self.results.to_html(index=False, classes=\'table\')}\n+ </table>\n+ <h2>Results on the Test Set for the best model</h2>\n+ <table>\n+ {self.test_result_df.to_html(index=False, classes=\'table\')}\n+ </table>\n+ </div>\n+ <div id="plots" class="tab-content">\n+ <h2>Best Model Plots on the testing set</h2>\n+ {plots_html}\n+ </div>\n+ <div id="feature" class="tab-content">\n+ {feature_importance_html}\n+ </div>\n+ <div id="explainer" class="tab-content">\n+ {self.plots_explainer_html}\n+ {tree_plots}\n+ </div>\n+ {get_html_closing()}\n+ """\n+\n+ with open(os.path.join(\n+ self.output_dir, "comparison_result.html"), "w") as file:\n+ file.write(html_content)\n+\n+ def save_dashboard(self):\n+ raise NotImplementedError("Subclasses should implement this method")\n+\n+ def generate_plots_explainer(self):\n+ raise NotImplementedError("Subclasses should implement this method")\n+\n+ # not working now\n+ def generate_tree_plots(self):\n+ from sklearn.ensemble import RandomForestClassifier, \\\n+ RandomForestRegressor\n+ from xgboost import XGBClassifier, XGBRegressor\n+ from explainerdashboard.explainers import RandomForestExplainer\n+\n+ LOG.info("Generating tree plots")\n+ X_test = self.exp.X_test_transformed.copy()\n+ y_test = self.exp.y_test_transformed\n+\n+ is_rf = isinstance(self.best_model, RandomForestClassifier) or \\\n+ isinstance(self.best_model, RandomForestRegressor)\n+\n+ is_xgb = isinstance(self.best_model, XGBClassifier) or \\\n+ isinstance(self.best_model, XGBRegressor)\n+\n+ try:\n+ if is_rf:\n+ num_trees = self.best_model.n_estimators\n+ if is_xgb:\n+ num_trees = len(self.best_model.get_booster().get_dump())\n+ explainer = RandomForestExplainer(self.best_model, X_test, y_test)\n+ for i in range(num_trees):\n+ fig = explainer.decisiontree_encoded(tree_idx=i, index=0)\n+ LOG.info(f"Tree {i+1}")\n+ LOG.info(fig)\n+ self.trees.append(fig)\n+ except Exception as e:\n+ LOG.error(f"Error generating tree plots: {e}")\n+\n+ def run(self):\n+ self.load_data()\n+ self.setup_pycaret()\n+ self.train_model()\n+ self.save_model()\n+ self.generate_plots()\n+ self.generate_plots_explainer()\n+ self.generate_tree_plots()\n+ self.save_html_report()\n+ # self.save_dashboard()\n'

diff -r 000000000000 -r 915447b14520 dashboard.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/dashboard.py Wed Dec 11 05:00:00 2024 +0000

[

@@ -0,0 +1,159 @@
+import logging
+from typing import Any, Dict, Optional
+
+from pycaret.utils.generic import get_label_encoder
+
+logging.basicConfig(level=logging.DEBUG)
+LOG = logging.getLogger(__name__)
+
+
+def generate_classifier_explainer_dashboard(
+        exp,
+        estimator,
+        display_format: str = "dash",
+        dashboard_kwargs: Optional[Dict[str, Any]] = None,
+        run_kwargs: Optional[Dict[str, Any]] = None,
+        **kwargs,):
+
+    """
+        This function is changed from pycaret.classification.oop.dashboard()
+
+        This function generates the interactive dashboard for a trained model.
+        The dashboard is implemented using
+        ExplainerDashboard (explainerdashboard.readthedocs.io)
+
+
+        estimator: scikit-learn compatible object
+            Trained model object
+
+
+        display_format: str, default = 'dash'
+            Render mode for the dashboard. The default is set to ``dash``
+            which will
+            render a dashboard in browser. There are four possible options:
+
+            - 'dash' - displays the dashboard in browser
+            - 'inline' - displays the dashboard in the jupyter notebook cell.
+            - 'jupyterlab' - displays the dashboard in jupyterlab pane.
+            - 'external' - displays the dashboard in a separate tab.
+                (use in Colab)
+
+
+        dashboard_kwargs: dict, default = {} (empty dict)
+            Dictionary of arguments passed to the ``ExplainerDashboard`` class.
+
+
+        run_kwargs: dict, default = {} (empty dict)
+            Dictionary of arguments passed to the ``run``
+            method of ``ExplainerDashboard``.
+
+
+        **kwargs:
+            Additional keyword arguments to pass to the ``ClassifierExplainer``
+            or ``RegressionExplainer`` class.
+
+
+        Returns:
+            ExplainerDashboard
+    """
+
+    dashboard_kwargs = dashboard_kwargs or {}
+    run_kwargs = run_kwargs or {}
+
+    from explainerdashboard import ClassifierExplainer, ExplainerDashboard
+
+    le = get_label_encoder(exp.pipeline)
+    if le:
+        labels_ = list(le.classes_)
+    else:
+        labels_ = None
+
+    # Replaceing chars which dash doesnt accept for column name `.` , `{`, `}`
+
+    X_test_df = exp.X_test_transformed.copy()
+    LOG.info(X_test_df)
+    X_test_df.columns = [
+        col.replace(".", "__").replace("{", "__").replace("}", "__")
+        for col in X_test_df.columns
+    ]
+
+    explainer = ClassifierExplainer(
+        estimator, X_test_df, exp.y_test_transformed, labels=labels_, **kwargs
+    )
+    return ExplainerDashboard(
+        explainer, mode=display_format,
+        contributions=False, whatif=False,
+        **dashboard_kwargs
+    )
+
+
+def generate_regression_explainer_dashboard(
+        exp,
+        estimator,
+        display_format: str = "dash",
+        dashboard_kwargs: Optional[Dict[str, Any]] = None,
+        run_kwargs: Optional[Dict[str, Any]] = None,
+        **kwargs,):
+
+    """
+    This function is changed from pycaret.regression.oop.dashboard()
+
+        This function generates the interactive dashboard for a trained model.
+        The dashboard is implemented using ExplainerDashboard
+        (explainerdashboard.readthedocs.io)
+
+
+        estimator: scikit-learn compatible object
+            Trained model object
+
+
+        display_format: str, default = 'dash'
+            Render mode for the dashboard. The default is set to ``dash``
+            which will
+            render a dashboard in browser. There are four possible options:
+
+            - 'dash' - displays the dashboard in browser
+            - 'inline' - displays the dashboard in the jupyter notebook cell.
+            - 'jupyterlab' - displays the dashboard in jupyterlab pane.
+            - 'external' - displays the dashboard in a separate tab.
+            (use in Colab)
+
+
+        dashboard_kwargs: dict, default = {} (empty dict)
+            Dictionary of arguments passed to the ``ExplainerDashboard`` class.
+
+
+        run_kwargs: dict, default = {} (empty dict)
+            Dictionary of arguments passed to the ``run`` method
+            of ``ExplainerDashboard``.
+
+
+        **kwargs:
+            Additional keyword arguments to pass to the
+            ``ClassifierExplainer`` or
+            ``RegressionExplainer`` class.
+
+
+        Returns:
+            ExplainerDashboard
+    """
+
+    dashboard_kwargs = dashboard_kwargs or {}
+    run_kwargs = run_kwargs or {}
+
+    from explainerdashboard import ExplainerDashboard, RegressionExplainer
+
+    # Replaceing chars which dash doesnt accept for column name `.` , `{`, `}`
+    X_test_df = exp.X_test_transformed.copy()
+    X_test_df.columns = [
+        col.replace(".", "__").replace("{", "__").replace("}", "__")
+        for col in X_test_df.columns
+    ]
+    explainer = RegressionExplainer(
+        estimator, X_test_df, exp.y_test_transformed, **kwargs
+    )
+    return ExplainerDashboard(
+        explainer, mode=display_format, contributions=False,
+        whatif=False, shap_interaction=False, decision_trees=False,
+        **dashboard_kwargs
+    )

diff -r 000000000000 -r 915447b14520 feature_importance.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/feature_importance.py Wed Dec 11 05:00:00 2024 +0000

[

@@ -0,0 +1,171 @@
+import base64
+import logging
+import os
+
+import matplotlib.pyplot as plt
+
+import pandas as pd
+
+from pycaret.classification import ClassificationExperiment
+from pycaret.regression import RegressionExperiment
+
+logging.basicConfig(level=logging.DEBUG)
+LOG = logging.getLogger(__name__)
+
+
+class FeatureImportanceAnalyzer:
+    def __init__(
+            self,
+            task_type,
+            output_dir,
+            data_path=None,
+            data=None,
+            target_col=None):
+
+        if data is not None:
+            self.data = data
+            LOG.info("Data loaded from memory")
+        else:
+            self.target_col = target_col
+            self.data = pd.read_csv(data_path, sep=None, engine='python')
+            self.data.columns = self.data.columns.str.replace('.', '_')
+            self.data = self.data.fillna(self.data.median(numeric_only=True))
+        self.task_type = task_type
+        self.target = self.data.columns[int(target_col) - 1]
+        self.exp = ClassificationExperiment() \
+            if task_type == 'classification' \
+            else RegressionExperiment()
+        self.plots = {}
+        self.output_dir = output_dir
+
+    def setup_pycaret(self):
+        LOG.info("Initializing PyCaret")
+        setup_params = {
+            'target': self.target,
+            'session_id': 123,
+            'html': True,
+            'log_experiment': False,
+            'system_log': False
+        }
+        LOG.info(self.task_type)
+        LOG.info(self.exp)
+        self.exp.setup(self.data, **setup_params)
+
+    # def save_coefficients(self):
+    #     model = self.exp.create_model('lr')
+    #     coef_df = pd.DataFrame({
+    #         'Feature': self.data.columns.drop(self.target),
+    #         'Coefficient': model.coef_[0]
+    #     })
+    #     coef_html = coef_df.to_html(index=False)
+    #     return coef_html
+
+    def save_tree_importance(self):
+        model = self.exp.create_model('rf')
+        importances = model.feature_importances_
+        processed_features = self.exp.get_config('X_transformed').columns
+        LOG.debug(f"Feature importances: {importances}")
+        LOG.debug(f"Features: {processed_features}")
+        feature_importances = pd.DataFrame({
+            'Feature': processed_features,
+            'Importance': importances
+        }).sort_values(by='Importance', ascending=False)
+        plt.figure(figsize=(10, 6))
+        plt.barh(
+            feature_importances['Feature'],
+            feature_importances['Importance'])
+        plt.xlabel('Importance')
+        plt.title('Feature Importance (Random Forest)')
+        plot_path = os.path.join(
+            self.output_dir,
+            'tree_importance.png')
+        plt.savefig(plot_path)
+        plt.close()
+        self.plots['tree_importance'] = plot_path
+
+    def save_shap_values(self):
+        model = self.exp.create_model('lightgbm')
+        import shap
+        explainer = shap.Explainer(model)
+        shap_values = explainer.shap_values(
+            self.exp.get_config('X_transformed'))
+        shap.summary_plot(shap_values,
+                          self.exp.get_config('X_transformed'), show=False)
+        plt.title('Shap (LightGBM)')
+        plot_path = os.path.join(
+            self.output_dir, 'shap_summary.png')
+        plt.savefig(plot_path)
+        plt.close()
+        self.plots['shap_summary'] = plot_path
+
+    def generate_feature_importance(self):
+        # coef_html = self.save_coefficients()
+        self.save_tree_importance()
+        self.save_shap_values()
+
+    def encode_image_to_base64(self, img_path):
+        with open(img_path, 'rb') as img_file:
+            return base64.b64encode(img_file.read()).decode('utf-8')
+
+    def generate_html_report(self):
+        LOG.info("Generating HTML report")
+
+        # Read and encode plot images
+        plots_html = ""
+        for plot_name, plot_path in self.plots.items():
+            encoded_image = self.encode_image_to_base64(plot_path)
+            plots_html += f"""
+            <div class="plot" id="{plot_name}">
+                <h2>{'Feature importance analysis from a'
+                    'trained Random Forest'
+                    if plot_name == 'tree_importance'
+                    else 'SHAP Summary from a trained lightgbm'}</h2>
+                <h3>{'Use gini impurity for'
+                    'calculating feature importance for classification'
+                    'and Variance Reduction for regression'
+                  if plot_name == 'tree_importance'
+                  else ''}</h3>
+                <img src="data:image/png;base64,
+                {encoded_image}" alt="{plot_name}">
+            </div>
+            """
+
+        # Generate HTML content with tabs
+        html_content = f"""
+            <h1>PyCaret Feature Importance Report</h1>
+            {plots_html}
+        """
+
+        return html_content
+
+    def run(self):
+        LOG.info("Running feature importance analysis")
+        self.setup_pycaret()
+        self.generate_feature_importance()
+        html_content = self.generate_html_report()
+        LOG.info("Feature importance analysis completed")
+        return html_content
+
+
+if __name__ == "__main__":
+    import argparse
+    parser = argparse.ArgumentParser(description="Feature Importance Analysis")
+    parser.add_argument(
+        "--data_path", type=str, help="Path to the dataset")
+    parser.add_argument(
+        "--target_col", type=int,
+        help="Index of the target column (1-based)")
+    parser.add_argument(
+        "--task_type", type=str,
+        choices=["classification", "regression"],
+        help="Task type: classification or regression")
+    parser.add_argument(
+        "--output_dir",
+        type=str,
+        help="Directory to save the outputs")
+    args = parser.parse_args()
+
+    analyzer = FeatureImportanceAnalyzer(
+        args.data_path, args.target_col,
+        args.task_type, args.output_dir)
+    analyzer.run()

diff -r 000000000000 -r 915447b14520 pycaret_classification.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/pycaret_classification.py Wed Dec 11 05:00:00 2024 +0000

[

b'@@ -0,0 +1,204 @@\n+import logging\n+\n+from base_model_trainer import BaseModelTrainer\n+\n+from dashboard import generate_classifier_explainer_dashboard\n+\n+from pycaret.classification import ClassificationExperiment\n+\n+from utils import add_hr_to_html, add_plot_to_html\n+\n+LOG = logging.getLogger(__name__)\n+\n+\n+class ClassificationModelTrainer(BaseModelTrainer):\n+ def __init__(\n+ self,\n+ input_file,\n+ target_col,\n+ output_dir,\n+ task_type,\n+ random_seed,\n+ test_file=None,\n+ **kwargs):\n+ super().__init__(\n+ input_file,\n+ target_col,\n+ output_dir,\n+ task_type,\n+ random_seed,\n+ test_file,\n+ **kwargs)\n+ self.exp = ClassificationExperiment()\n+\n+ def save_dashboard(self):\n+ LOG.info("Saving explainer dashboard")\n+ dashboard = generate_classifier_explainer_dashboard(self.exp,\n+ self.best_model)\n+ dashboard.save_html("dashboard.html")\n+\n+ def generate_plots(self):\n+ LOG.info("Generating and saving plots")\n+ plots = [\'confusion_matrix\', \'auc\', \'threshold\', \'pr\',\n+ \'error\', \'class_report\', \'learning\', \'calibration\',\n+ \'vc\', \'dimension\', \'manifold\', \'rfe\', \'feature\',\n+ \'feature_all\']\n+ for plot_name in plots:\n+ try:\n+ if plot_name == \'auc\' and not self.exp.is_multiclass:\n+ plot_path = self.exp.plot_model(self.best_model,\n+ plot=plot_name,\n+ save=True,\n+ plot_kwargs={\n+ \'micro\': False,\n+ \'macro\': False,\n+ \'per_class\': False,\n+ \'binary\': True\n+ }\n+ )\n+ self.plots[plot_name] = plot_path\n+ continue\n+\n+ plot_path = self.exp.plot_model(self.best_model,\n+ plot=plot_name, save=True)\n+ self.plots[plot_name] = plot_path\n+ except Exception as e:\n+ LOG.error(f"Error generating plot {plot_name}: {e}")\n+ continue\n+\n+ def generate_plots_explainer(self):\n+ LOG.info("Generating and saving plots from explainer")\n+\n+ from explainerdashboard import ClassifierExplainer\n+\n+ X_test = self.exp.X_test_transformed.copy()\n+ y_test = self.exp.y_test_transformed\n+\n+ explainer = ClassifierExplainer(self.best_model, X_test, y_test)\n+ self.expaliner = explainer\n+ plots_explainer_html = ""\n+\n+ try:\n+ fig_importance = explainer.plot_importances()\n+ plots_explainer_html += add_plot_to_html(fig_importance)\n+ plots_explainer_html += add_hr_to_html()\n+ except Exception as e:\n+ LOG.error(f"Error generating plot importance(mean shap): {e}")\n+\n+ try:\n+ fig_importance_perm = explainer.plot_importances(\n+ kind="permutation")\n+ plots_explainer_html += add_plot_to_html(fig_importance_perm)\n+ plots_explainer_html += add_hr_to_html()\n+ except Exception as e:\n+ LOG.error(f"Error generating plot importance(permutation): {e}")\n+\n+ # try:\n+ # fig_shap = explainer.plot_shap_summary()\n+ # plots_explainer_html += add_plot_to_html(fig_shap,\n+ # include_plotlyjs=False)\n+ # except Exception as e:\n+ # LOG.error(f"Error generating plot shap: {e}")\n+\n+ # tr'..b'er.plot_dependence(col=feature)\n+ # plots_explainer_html += add_plot_to_html(fig_dependence)\n+ # except Exception as e:\n+ # LOG.error(f"Error generating plot dependencies: {e}")\n+\n+ try:\n+ for feature in self.features_name:\n+ fig_pdp = explainer.plot_pdp(feature)\n+ plots_explainer_html += add_plot_to_html(fig_pdp)\n+ plots_explainer_html += add_hr_to_html()\n+ except Exception as e:\n+ LOG.error(f"Error generating plot pdp: {e}")\n+\n+ try:\n+ for feature in self.features_name:\n+ fig_interaction = explainer.plot_interaction(\n+ col=feature, interact_col=feature)\n+ plots_explainer_html += add_plot_to_html(fig_interaction)\n+ except Exception as e:\n+ LOG.error(f"Error generating plot interactions: {e}")\n+\n+ try:\n+ for feature in self.features_name:\n+ fig_interactions_importance = \\\n+ explainer.plot_interactions_importance(\n+ col=feature)\n+ plots_explainer_html += add_plot_to_html(\n+ fig_interactions_importance)\n+ plots_explainer_html += add_hr_to_html()\n+ except Exception as e:\n+ LOG.error(f"Error generating plot interactions importance: {e}")\n+\n+ # try:\n+ # for feature in self.features_name:\n+ # fig_interactions_detailed = \\\n+ # explainer.plot_interactions_detailed(\n+ # col=feature)\n+ # plots_explainer_html += add_plot_to_html(\n+ # fig_interactions_detailed)\n+ # except Exception as e:\n+ # LOG.error(f"Error generating plot interactions detailed: {e}")\n+\n+ try:\n+ fig_precision = explainer.plot_precision()\n+ plots_explainer_html += add_plot_to_html(fig_precision)\n+ plots_explainer_html += add_hr_to_html()\n+ except Exception as e:\n+ LOG.error(f"Error generating plot precision: {e}")\n+\n+ try:\n+ fig_cumulative_precision = explainer.plot_cumulative_precision()\n+ plots_explainer_html += add_plot_to_html(fig_cumulative_precision)\n+ plots_explainer_html += add_hr_to_html()\n+ except Exception as e:\n+ LOG.error(f"Error generating plot cumulative precision: {e}")\n+\n+ try:\n+ fig_classification = explainer.plot_classification()\n+ plots_explainer_html += add_plot_to_html(fig_classification)\n+ plots_explainer_html += add_hr_to_html()\n+ except Exception as e:\n+ LOG.error(f"Error generating plot classification: {e}")\n+\n+ try:\n+ fig_confusion_matrix = explainer.plot_confusion_matrix()\n+ plots_explainer_html += add_plot_to_html(fig_confusion_matrix)\n+ plots_explainer_html += add_hr_to_html()\n+ except Exception as e:\n+ LOG.error(f"Error generating plot confusion matrix: {e}")\n+\n+ try:\n+ fig_lift_curve = explainer.plot_lift_curve()\n+ plots_explainer_html += add_plot_to_html(fig_lift_curve)\n+ plots_explainer_html += add_hr_to_html()\n+ except Exception as e:\n+ LOG.error(f"Error generating plot lift curve: {e}")\n+\n+ try:\n+ fig_roc_auc = explainer.plot_roc_auc()\n+ plots_explainer_html += add_plot_to_html(fig_roc_auc)\n+ plots_explainer_html += add_hr_to_html()\n+ except Exception as e:\n+ LOG.error(f"Error generating plot roc auc: {e}")\n+\n+ try:\n+ fig_pr_auc = explainer.plot_pr_auc()\n+ plots_explainer_html += add_plot_to_html(fig_pr_auc)\n+ plots_explainer_html += add_hr_to_html()\n+ except Exception as e:\n+ LOG.error(f"Error generating plot pr auc: {e}")\n+\n+ self.plots_explainer_html = plots_explainer_html\n'

diff -r 000000000000 -r 915447b14520 pycaret_macros.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/pycaret_macros.xml Wed Dec 11 05:00:00 2024 +0000

@@ -0,0 +1,25 @@
+<macros>
+    <token name="@PYCARET_VERSION@">3.3.2</token>
+    <token name="@SUFFIX@">0</token>
+    <token name="@VERSION@">@PYCARET_VERSION@+@SUFFIX@</token>
+    <token name="@PROFILE@">21.05</token>
+    <xml name="python_requirements">
+        <requirements>
+            <container type="docker">quay.io/goeckslab/galaxy-pycaret:3.3.2</container>
+        </requirements>
+    </xml>
+    <xml name="macro_citations">
+        <citations>
+            <citation type="bibtex">@Manual{PyCaret,
+  author  = {Moez Ali},
+  title   = {PyCaret: An open source, low-code machine learning library in Python},
+  year    = {2020},
+  month   = {April},
+  note    = {PyCaret version 1.0.0},
+  url     = {https://www.pycaret.org}
+}
+            </citation>
+        </citations>
+    </xml>
+
+</macros>
\ No newline at end of file

diff -r 000000000000 -r 915447b14520 pycaret_predict.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/pycaret_predict.py Wed Dec 11 05:00:00 2024 +0000

[

@@ -0,0 +1,200 @@
+import argparse
+import logging
+import tempfile
+
+import h5py
+
+import joblib
+
+import pandas as pd
+
+from pycaret.classification import ClassificationExperiment
+from pycaret.regression import RegressionExperiment
+
+from sklearn.metrics import average_precision_score
+
+from utils import encode_image_to_base64, get_html_closing, get_html_template
+
+LOG = logging.getLogger(__name__)
+
+
+class PyCaretModelEvaluator:
+    def __init__(self, model_path, task, target):
+        self.model_path = model_path
+        self.task = task.lower()
+        self.model = self.load_h5_model()
+        self.target = target if target != "None" else None
+
+    def load_h5_model(self):
+        """Load a PyCaret model from an HDF5 file."""
+        with h5py.File(self.model_path, 'r') as f:
+            model_bytes = bytes(f['model'][()])
+            with tempfile.NamedTemporaryFile(delete=False) as temp_file:
+                temp_file.write(model_bytes)
+                temp_file.seek(0)
+                loaded_model = joblib.load(temp_file.name)
+        return loaded_model
+
+    def evaluate(self, data_path):
+        """Evaluate the model using the specified data."""
+        raise NotImplementedError("Subclasses must implement this method")
+
+
+class ClassificationEvaluator(PyCaretModelEvaluator):
+    def evaluate(self, data_path):
+        metrics = None
+        plot_paths = {}
+        data = pd.read_csv(data_path, engine='python', sep=None)
+        if self.target:
+            exp = ClassificationExperiment()
+            names = data.columns.to_list()
+            LOG.error(f"Column names: {names}")
+            target_index = int(self.target)-1
+            target_name = names[target_index]
+            exp.setup(data, target=target_name, test_data=data, index=False)
+            exp.add_metric(id='PR-AUC-Weighted',
+                           name='PR-AUC-Weighted',
+                           target='pred_proba',
+                           score_func=average_precision_score,
+                           average='weighted')
+            predictions = exp.predict_model(self.model)
+            metrics = exp.pull()
+            plots = ['confusion_matrix', 'auc', 'threshold', 'pr',
+                     'error', 'class_report', 'learning', 'calibration',
+                     'vc', 'dimension', 'manifold', 'rfe', 'feature',
+                     'feature_all']
+            for plot_name in plots:
+                try:
+                    if plot_name == 'auc' and not exp.is_multiclass:
+                        plot_path = exp.plot_model(self.model,
+                                                   plot=plot_name,
+                                                   save=True,
+                                                   plot_kwargs={
+                                                       'micro': False,
+                                                       'macro': False,
+                                                       'per_class': False,
+                                                       'binary': True
+                                                    })
+                        plot_paths[plot_name] = plot_path
+                        continue
+
+                    plot_path = exp.plot_model(self.model,
+                                               plot=plot_name, save=True)
+                    plot_paths[plot_name] = plot_path
+                except Exception as e:
+                    LOG.error(f"Error generating plot {plot_name}: {e}")
+                    continue
+            generate_html_report(plot_paths, metrics)
+
+        else:
+            exp = ClassificationExperiment()
+            exp.setup(data, target=None, test_data=data, index=False)
+            predictions = exp.predict_model(self.model, data=data)
+
+        return predictions, metrics, plot_paths
+
+
+class RegressionEvaluator(PyCaretModelEvaluator):
+    def evaluate(self, data_path):
+        metrics = None
+        plot_paths = {}
+        data = pd.read_csv(data_path, engine='python', sep=None)
+        if self.target:
+            names = data.columns.to_list()
+            target_index = int(self.target)-1
+            target_name = names[target_index]
+            exp = RegressionExperiment()
+            exp.setup(data, target=target_name, test_data=data, index=False)
+            predictions = exp.predict_model(self.model)
+            metrics = exp.pull()
+            plots = ['residuals', 'error', 'cooks',
+                     'learning', 'vc', 'manifold',
+                     'rfe', 'feature', 'feature_all']
+            for plot_name in plots:
+                try:
+                    plot_path = exp.plot_model(self.model,
+                                               plot=plot_name, save=True)
+                    plot_paths[plot_name] = plot_path
+                except Exception as e:
+                    LOG.error(f"Error generating plot {plot_name}: {e}")
+                    continue
+            generate_html_report(plot_paths, metrics)
+        else:
+            exp = RegressionExperiment()
+            exp.setup(data, target=None, test_data=data, index=False)
+            predictions = exp.predict_model(self.model, data=data)
+
+        return predictions, metrics, plot_paths
+
+
+def generate_html_report(plots, metrics):
+    """Generate an HTML evaluation report."""
+    plots_html = ""
+    for plot_name, plot_path in plots.items():
+        encoded_image = encode_image_to_base64(plot_path)
+        plots_html += f"""
+        <div class="plot">
+            <h3>{plot_name.capitalize()}</h3>
+            <img src="data:image/png;base64,{encoded_image}" alt="{plot_name}">
+        </div>
+        <hr>
+        """
+
+    metrics_html = metrics.to_html(index=False, classes="table")
+
+    html_content = f"""
+    {get_html_template()}
+    <h1>Model Evaluation Report</h1>
+    <div class="tabs">
+        <div class="tab" onclick="openTab(event, 'metrics')">Metrics</div>
+        <div class="tab" onclick="openTab(event, 'plots')">Plots</div>
+    </div>
+    <div id="metrics" class="tab-content">
+        <h2>Metrics</h2>
+        <table>
+            {metrics_html}
+        </table>
+    </div>
+    <div id="plots" class="tab-content">
+        <h2>Plots</h2>
+        {plots_html}
+    </div>
+    {get_html_closing()}
+    """
+
+    # Save HTML report
+    with open("evaluation_report.html", "w") as html_file:
+        html_file.write(html_content)
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(
+        description="Evaluate a PyCaret model stored in HDF5 format.")
+    parser.add_argument("--model_path",
+                        type=str,
+                        help="Path to the HDF5 model file.")
+    parser.add_argument("--data_path",
+                        type=str,
+                        help="Path to the evaluation data CSV file.")
+    parser.add_argument("--task",
+                        type=str,
+                        choices=["classification", "regression"],
+                        help="Specify the task: classification or regression.")
+    parser.add_argument("--target",
+                        default=None,
+                        help="Column number of the target")
+    args = parser.parse_args()
+
+    if args.task == "classification":
+        evaluator = ClassificationEvaluator(
+            args.model_path, args.task, args.target)
+    elif args.task == "regression":
+        evaluator = RegressionEvaluator(
+            args.model_path, args.task, args.target)
+    else:
+        raise ValueError(
+            "Unsupported task type. Use 'classification' or 'regression'.")
+
+    predictions, metrics, plots = evaluator.evaluate(args.data_path)
+
+    predictions.to_csv("predictions.csv", index=False)

diff -r 000000000000 -r 915447b14520 pycaret_regression.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/pycaret_regression.py Wed Dec 11 05:00:00 2024 +0000

[

@@ -0,0 +1,134 @@
+import logging
+
+from base_model_trainer import BaseModelTrainer
+
+from dashboard import generate_regression_explainer_dashboard
+
+from pycaret.regression import RegressionExperiment
+
+from utils import add_hr_to_html, add_plot_to_html
+
+LOG = logging.getLogger(__name__)
+
+
+class RegressionModelTrainer(BaseModelTrainer):
+    def __init__(
+            self,
+            input_file,
+            target_col,
+            output_dir,
+            task_type,
+            random_seed,
+            test_file=None,
+            **kwargs):
+        super().__init__(
+            input_file,
+            target_col,
+            output_dir,
+            task_type,
+            random_seed,
+            test_file,
+            **kwargs)
+        self.exp = RegressionExperiment()
+
+    def save_dashboard(self):
+        LOG.info("Saving explainer dashboard")
+        dashboard = generate_regression_explainer_dashboard(self.exp,
+                                                            self.best_model)
+        dashboard.save_html("dashboard.html")
+
+    def generate_plots(self):
+        LOG.info("Generating and saving plots")
+        plots = ['residuals', 'error', 'cooks',
+                 'learning', 'vc', 'manifold',
+                 'rfe', 'feature', 'feature_all']
+        for plot_name in plots:
+            try:
+                plot_path = self.exp.plot_model(self.best_model,
+                                                plot=plot_name, save=True)
+                self.plots[plot_name] = plot_path
+            except Exception as e:
+                LOG.error(f"Error generating plot {plot_name}: {e}")
+                continue
+
+    def generate_plots_explainer(self):
+        LOG.info("Generating and saving plots from explainer")
+
+        from explainerdashboard import RegressionExplainer
+
+        X_test = self.exp.X_test_transformed.copy()
+        y_test = self.exp.y_test_transformed
+
+        explainer = RegressionExplainer(self.best_model, X_test, y_test)
+        self.expaliner = explainer
+        plots_explainer_html = ""
+
+        try:
+            fig_importance = explainer.plot_importances()
+            plots_explainer_html += add_plot_to_html(fig_importance)
+            plots_explainer_html += add_hr_to_html()
+        except Exception as e:
+            LOG.error(f"Error generating plot importance: {e}")
+
+        try:
+            fig_importance_permutation = \
+                explainer.plot_importances_permutation(
+                    kind="permutation")
+            plots_explainer_html += add_plot_to_html(
+                fig_importance_permutation)
+            plots_explainer_html += add_hr_to_html()
+        except Exception as e:
+            LOG.error(f"Error generating plot importance permutation: {e}")
+
+        try:
+            for feature in self.features_name:
+                fig_shap = explainer.plot_pdp(feature)
+                plots_explainer_html += add_plot_to_html(fig_shap)
+                plots_explainer_html += add_hr_to_html()
+        except Exception as e:
+            LOG.error(f"Error generating plot shap dependence: {e}")
+
+        # try:
+        #     for feature in self.features_name:
+        #         fig_interaction = explainer.plot_interaction(col=feature)
+        #         plots_explainer_html += add_plot_to_html(fig_interaction)
+        # except Exception as e:
+        #     LOG.error(f"Error generating plot shap interaction: {e}")
+
+        try:
+            for feature in self.features_name:
+                fig_interactions_importance = \
+                    explainer.plot_interactions_importance(
+                        col=feature)
+                plots_explainer_html += add_plot_to_html(
+                    fig_interactions_importance)
+                plots_explainer_html += add_hr_to_html()
+        except Exception as e:
+            LOG.error(f"Error generating plot shap summary: {e}")
+
+        # Regression specific plots
+        try:
+            fig_pred_actual = explainer.plot_predicted_vs_actual()
+            plots_explainer_html += add_plot_to_html(fig_pred_actual)
+            plots_explainer_html += add_hr_to_html()
+        except Exception as e:
+            LOG.error(f"Error generating plot prediction vs actual: {e}")
+
+        try:
+            fig_residuals = explainer.plot_residuals()
+            plots_explainer_html += add_plot_to_html(fig_residuals)
+            plots_explainer_html += add_hr_to_html()
+        except Exception as e:
+            LOG.error(f"Error generating plot residuals: {e}")
+
+        try:
+            for feature in self.features_name:
+                fig_residuals_vs_feature = \
+                    explainer.plot_residuals_vs_feature(feature)
+                plots_explainer_html += add_plot_to_html(
+                    fig_residuals_vs_feature)
+                plots_explainer_html += add_hr_to_html()
+        except Exception as e:
+            LOG.error(f"Error generating plot residuals vs feature: {e}")
+
+        self.plots_explainer_html = plots_explainer_html

diff -r 000000000000 -r 915447b14520 pycaret_train.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/pycaret_train.py Wed Dec 11 05:00:00 2024 +0000

[

@@ -0,0 +1,117 @@
+import argparse
+import logging
+
+from pycaret_classification import ClassificationModelTrainer
+
+from pycaret_regression import RegressionModelTrainer
+
+logging.basicConfig(level=logging.DEBUG)
+LOG = logging.getLogger(__name__)
+
+
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--input_file", help="Path to the input file")
+    parser.add_argument("--target_col", help="Column number of the target")
+    parser.add_argument("--output_dir",
+                        help="Path to the output directory")
+    parser.add_argument("--model_type",
+                        choices=["classification", "regression"],
+                        help="Type of the model")
+    parser.add_argument("--train_size", type=float,
+                        default=None,
+                        help="Train size for PyCaret setup")
+    parser.add_argument("--normalize", action="store_true",
+                        default=None,
+                        help="Normalize data for PyCaret setup")
+    parser.add_argument("--feature_selection", action="store_true",
+                        default=None,
+                        help="Perform feature selection for PyCaret setup")
+    parser.add_argument("--cross_validation", action="store_true",
+                        default=None,
+                        help="Perform cross-validation for PyCaret setup")
+    parser.add_argument("--cross_validation_folds", type=int,
+                        default=None,
+                        help="Number of cross-validation folds \
+                          for PyCaret setup")
+    parser.add_argument("--remove_outliers", action="store_true",
+                        default=None,
+                        help="Remove outliers for PyCaret setup")
+    parser.add_argument("--remove_multicollinearity", action="store_true",
+                        default=None,
+                        help="Remove multicollinearity for PyCaret setup")
+    parser.add_argument("--polynomial_features", action="store_true",
+                        default=None,
+                        help="Generate polynomial features for PyCaret setup")
+    parser.add_argument("--feature_interaction", action="store_true",
+                        default=None,
+                        help="Generate feature interactions for PyCaret setup")
+    parser.add_argument("--feature_ratio", action="store_true",
+                        default=None,
+                        help="Generate feature ratios for PyCaret setup")
+    parser.add_argument("--fix_imbalance", action="store_true",
+                        default=None,
+                        help="Fix class imbalance for PyCaret setup")
+    parser.add_argument("--models", nargs='+',
+                        default=None,
+                        help="Selected models for training")
+    parser.add_argument("--random_seed", type=int,
+                        default=42,
+                        help="Random seed for PyCaret setup")
+    parser.add_argument("--test_file", type=str, default=None,
+                        help="Path to the test data file")
+
+    args = parser.parse_args()
+
+    model_kwargs = {
+        "train_size": args.train_size,
+        "normalize": args.normalize,
+        "feature_selection": args.feature_selection,
+        "cross_validation": args.cross_validation,
+        "cross_validation_folds": args.cross_validation_folds,
+        "remove_outliers": args.remove_outliers,
+        "remove_multicollinearity": args.remove_multicollinearity,
+        "polynomial_features": args.polynomial_features,
+        "feature_interaction": args.feature_interaction,
+        "feature_ratio": args.feature_ratio,
+        "fix_imbalance": args.fix_imbalance,
+    }
+    LOG.info(f"Model kwargs: {model_kwargs}")
+
+    # Remove None values from model_kwargs
+
+    LOG.info(f"Model kwargs 2: {model_kwargs}")
+    if args.models:
+        model_kwargs["models"] = args.models[0].split(",")
+
+    model_kwargs = {k: v for k, v in model_kwargs.items() if v is not None}
+
+    if args.model_type == "classification":
+        trainer = ClassificationModelTrainer(
+            args.input_file,
+            args.target_col,
+            args.output_dir,
+            args.model_type,
+            args.random_seed,
+            args.test_file,
+            **model_kwargs)
+    elif args.model_type == "regression":
+        if "fix_imbalance" in model_kwargs:
+            del model_kwargs["fix_imbalance"]
+        trainer = RegressionModelTrainer(
+            args.input_file,
+            args.target_col,
+            args.output_dir,
+            args.model_type,
+            args.random_seed,
+            args.test_file,
+            **model_kwargs)
+    else:
+        LOG.error("Invalid model type. Please choose \
+                  'classification' or 'regression'.")
+        return
+    trainer.run()
+
+
+if __name__ == "__main__":
+    main()

diff -r 000000000000 -r 915447b14520 pycaret_train.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/pycaret_train.xml Wed Dec 11 05:00:00 2024 +0000

[

b'@@ -0,0 +1,209 @@\n+<tool id="pycaret_compare" name="PyCaret Model Comparison" version="@VERSION@" profile="@PROFILE@">\n+ <description>compares different machine learning models on a dataset using PyCaret. Do feature analyses using Random Forest and LightGBM. </description>\n+ <macros>\n+ <import>pycaret_macros.xml</import>\n+ </macros>\n+ <expand macro="python_requirements" />\n+ <command>\n+ <![CDATA[\n+ python $__tool_directory__/pycaret_train.py --input_file $input_file --target_col $target_feature --output_dir "`pwd`" --random_seed $random_seed\n+ #if $model_type == "classification"\n+ #if $classification_models\n+ --models $classification_models\n+ #end if\n+ #end if\n+ #if $model_type == "regression"\n+ #if $regression_models\n+ --models $regression_models\n+ #end if\n+ #end if\n+ #if $customize_defaults == "true"\n+ #if $train_size\n+ --train_size $train_size \n+ #end if\n+ #if $normalize\n+ --normalize \n+ #end if\n+ #if $feature_selection\n+ --feature_selection\n+ #end if\n+ #if $enable_cross_validation == "true" \n+ --cross_validation \n+ #end if\n+ #if $cross_validation_folds\n+ --cross_validation_folds $cross_validation_folds \n+ #end if\n+ #if $remove_outliers\n+ --remove_outliers \n+ #end if\n+ #if $remove_multicollinearity\n+ --remove_multicollinearity \n+ #end if\n+ #if $polynomial_features\n+ --polynomial_features \n+ #end if\n+ #if $fix_imbalance\n+ --fix_imbalance \n+ #end if\n+ #end if\n+ #if $test_file\n+ --test_file $test_file \n+ #end if \n+ --model_type $model_type \n+ ]]>\n+ </command>\n+ <inputs>\n+ <param name="input_file" type="data" format="csv,tabular" label="Train Dataset (CSV or TSV)" />\n+ <param name="test_file" type="data" format="csv,tabular" optional="true" label="Test Dataset (CSV or TSV)"\n+ help="If a test set is not provided, \n+ the selected training set will be split into training, validation, and test sets. \n+ If a test set is provided, the training set will only be split into training and validation sets. \n+ BTW, cross-validation is always applied by default." />\n+ <param name="target_feature" multiple="false" type="data_column" use_header_names="true" data_ref="input_file" label="Select the target column:" />\n+ <conditional name="model_selection">\n+ <param name="model_type" type="select" label="Task">\n+ <option value="classification">classification</option>\n+ <option value="regression">regression</option>\n+ </param>\n+ <when value="classification">\n+ <param name="classification_models" type="select" multiple="true" label="Only Select Classification Models if you don\'t want to compare all models">\n+ <option value="lr">Logistic Regression</option>\n+ <option value="knn">K Neighbors Classifier</option>\n+ <option value="nb">Naive Bayes</option>\n+ <option value="dt">Decision Tree Classifier</option>\n+ <option value="svm">SVM - Linear Kernel</option>\n+ <option value="rbfsvm">SVM - Radial Kernel</option>\n+ <option value="gpc">Gaussian Process Classifier</option>\n+ <option value="mlp">MLP Classifier</option>\n+ <option value="ridge">Ridge Classifier</option>\n+ <option value="rf">Random Forest Classifier</option>\n+ <'..b'ed="false" label="Polynomial Features" help="Whether to create polynomial features before training. Default: False" />\n+ <param name="fix_imbalance" type="boolean" truevalue="True" falsevalue="False" checked="false" label="Fix Imbalance" help="ONLY for classfication! Whether to use SMOTE or similar methods to fix imbalance in the dataset. Default: False" />\n+ </when>\n+ <when value="false">\n+ \n+ </when>\n+ </conditional>\n+ </inputs>\n+ <outputs>\n+ <data name="model" format="h5" from_work_dir="pycaret_model.h5" label="${tool.name} best model on ${on_string}" />\n+ <data name="comparison_result" format="html" from_work_dir="comparison_result.html" label="${tool.name} Comparison result on ${on_string}"/>\n+ <data name="best_model_csv" format="csv" from_work_dir="best_model.csv" label="${tool.name} The prams of the best model on ${on_string}" hidden="true" />\n+ </outputs>\n+ <tests>\n+ <test>\n+ <param name="input_file" value="pcr.tsv"/>\n+ <param name="target_feature" value="11"/> \n+ <param name="model_type" value="classification"/>\n+ <param name="random_seed" value="42"/>\n+ <param name="customize_defaults" value="true"/>\n+ <param name="train_size" value="0.8"/>\n+ <param name="normalize" value="true"/>\n+ <param name="feature_selection" value="true"/>\n+ <param name="enable_cross_validation" value="true"/>\n+ <param name="cross_validation_folds" value="5"/>\n+ <param name="remove_outliers" value="true"/>\n+ <param name="remove_multicollinearity" value="true"/>\n+ <output name="model" file="expected_model_classification_customized.h5" compare="sim_size"/>\n+ <output name="comparison_result" file="expected_comparison_result_classification_customized.html" compare="sim_size" /> \n+ <output name="best_model_csv" value="expected_best_model_classification_customized.csv" />\n+ </test>\n+ <test>\n+ <param name="input_file" value="pcr.tsv"/>\n+ <param name="target_feature" value="11"/> \n+ <param name="model_type" value="classification"/>\n+ <param name="random_seed" value="42"/>\n+ <output name="model" file="expected_model_classification.h5" compare="sim_size"/>\n+ <output name="comparison_result" file="expected_comparison_result_classification.html" compare="sim_size" /> \n+ <output name="best_model_csv" value="expected_best_model_classification.csv" />\n+ </test>\n+ <test>\n+ <param name="input_file" value="auto-mpg.tsv"/>\n+ <param name="target_feature" value="1"/> \n+ <param name="model_type" value="regression"/>\n+ <param name="random_seed" value="42"/>\n+ <output name="model" file="expected_model_regression.h5" compare="sim_size" />\n+ <output name="comparison_result" file="expected_comparison_result_regression.html" compare="sim_size" /> \n+ <output name="best_model_csv" value="expected_best_model_regression.csv" />\n+ </test>\n+ </tests>\n+ <help>\n+ This tool uses PyCaret to train and evaluate machine learning models.\n+ It compares different models on a dataset and provides the best model based on the performance metrics.\n+\n+ **Outputs**\n+\n+ - **Model**: The best model trained on the dataset in h5 format.\n+\n+\n+ - **Comparison Result**: The comparison result of different models in html format. \n+ It contains the performance metrics of different models, plots of the best model \n+ on the testing set (or part of the training set if a separate test set is not uploaded), and feature analysis plots.\n+\n+ </help>\n+ <expand macro="macro_citations" />\n+</tool>\n\\ No newline at end of file\n'

diff -r 000000000000 -r 915447b14520 test-data/auto-mpg.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/auto-mpg.tsv Wed Dec 11 05:00:00 2024 +0000

b'@@ -0,0 +1,399 @@\n+MPG\tCylinders\tDisplacement\tHorsepower\tWeight\tAcceleration\tModelYear\tOrigin\n+18.0\t8\t307.0\t130.0\t3504.\t12.0\t70\t1\n+15.0\t8\t350.0\t165.0\t3693.\t11.5\t70\t1\n+18.0\t8\t318.0\t150.0\t3436.\t11.0\t70\t1\n+16.0\t8\t304.0\t150.0\t3433.\t12.0\t70\t1\n+17.0\t8\t302.0\t140.0\t3449.\t10.5\t70\t1\n+15.0\t8\t429.0\t198.0\t4341.\t10.0\t70\t1\n+14.0\t8\t454.0\t220.0\t4354.\t9.0\t70\t1\n+14.0\t8\t440.0\t215.0\t4312.\t8.5\t70\t1\n+14.0\t8\t455.0\t225.0\t4425.\t10.0\t70\t1\n+15.0\t8\t390.0\t190.0\t3850.\t8.5\t70\t1\n+15.0\t8\t383.0\t170.0\t3563.\t10.0\t70\t1\n+14.0\t8\t340.0\t160.0\t3609.\t8.0\t70\t1\n+15.0\t8\t400.0\t150.0\t3761.\t9.5\t70\t1\n+14.0\t8\t455.0\t225.0\t3086.\t10.0\t70\t1\n+24.0\t4\t113.0\t95.00\t2372.\t15.0\t70\t3\n+22.0\t6\t198.0\t95.00\t2833.\t15.5\t70\t1\n+18.0\t6\t199.0\t97.00\t2774.\t15.5\t70\t1\n+21.0\t6\t200.0\t85.00\t2587.\t16.0\t70\t1\n+27.0\t4\t97.00\t88.00\t2130.\t14.5\t70\t3\n+26.0\t4\t97.00\t46.00\t1835.\t20.5\t70\t2\n+25.0\t4\t110.0\t87.00\t2672.\t17.5\t70\t2\n+24.0\t4\t107.0\t90.00\t2430.\t14.5\t70\t2\n+25.0\t4\t104.0\t95.00\t2375.\t17.5\t70\t2\n+26.0\t4\t121.0\t113.0\t2234.\t12.5\t70\t2\n+21.0\t6\t199.0\t90.00\t2648.\t15.0\t70\t1\n+10.0\t8\t360.0\t215.0\t4615.\t14.0\t70\t1\n+10.0\t8\t307.0\t200.0\t4376.\t15.0\t70\t1\n+11.0\t8\t318.0\t210.0\t4382.\t13.5\t70\t1\n+9.0\t8\t304.0\t193.0\t4732.\t18.5\t70\t1\n+27.0\t4\t97.00\t88.00\t2130.\t14.5\t71\t3\n+28.0\t4\t140.0\t90.00\t2264.\t15.5\t71\t1\n+25.0\t4\t113.0\t95.00\t2228.\t14.0\t71\t3\n+25.0\t4\t98.00\t?\t2046.\t19.0\t71\t1\n+19.0\t6\t232.0\t100.0\t2634.\t13.0\t71\t1\n+16.0\t6\t225.0\t105.0\t3439.\t15.5\t71\t1\n+17.0\t6\t250.0\t100.0\t3329.\t15.5\t71\t1\n+19.0\t6\t250.0\t88.00\t3302.\t15.5\t71\t1\n+18.0\t6\t232.0\t100.0\t3288.\t15.5\t71\t1\n+14.0\t8\t350.0\t165.0\t4209.\t12.0\t71\t1\n+14.0\t8\t400.0\t175.0\t4464.\t11.5\t71\t1\n+14.0\t8\t351.0\t153.0\t4154.\t13.5\t71\t1\n+14.0\t8\t318.0\t150.0\t4096.\t13.0\t71\t1\n+12.0\t8\t383.0\t180.0\t4955.\t11.5\t71\t1\n+13.0\t8\t400.0\t170.0\t4746.\t12.0\t71\t1\n+13.0\t8\t400.0\t175.0\t5140.\t12.0\t71\t1\n+18.0\t6\t258.0\t110.0\t2962.\t13.5\t71\t1\n+22.0\t4\t140.0\t72.00\t2408.\t19.0\t71\t1\n+19.0\t6\t250.0\t100.0\t3282.\t15.0\t71\t1\n+18.0\t6\t250.0\t88.00\t3139.\t14.5\t71\t1\n+23.0\t4\t122.0\t86.00\t2220.\t14.0\t71\t1\n+28.0\t4\t116.0\t90.00\t2123.\t14.0\t71\t2\n+30.0\t4\t79.00\t70.00\t2074.\t19.5\t71\t2\n+30.0\t4\t88.00\t76.00\t2065.\t14.5\t71\t2\n+31.0\t4\t71.00\t65.00\t1773.\t19.0\t71\t3\n+35.0\t4\t72.00\t69.00\t1613.\t18.0\t71\t3\n+27.0\t4\t97.00\t60.00\t1834.\t19.0\t71\t2\n+26.0\t4\t91.00\t70.00\t1955.\t20.5\t71\t1\n+24.0\t4\t113.0\t95.00\t2278.\t15.5\t72\t3\n+25.0\t4\t97.50\t80.00\t2126.\t17.0\t72\t1\n+23.0\t4\t97.00\t54.00\t2254.\t23.5\t72\t2\n+20.0\t4\t140.0\t90.00\t2408.\t19.5\t72\t1\n+21.0\t4\t122.0\t86.00\t2226.\t16.5\t72\t1\n+13.0\t8\t350.0\t165.0\t4274.\t12.0\t72\t1\n+14.0\t8\t400.0\t175.0\t4385.\t12.0\t72\t1\n+15.0\t8\t318.0\t150.0\t4135.\t13.5\t72\t1\n+14.0\t8\t351.0\t153.0\t4129.\t13.0\t72\t1\n+17.0\t8\t304.0\t150.0\t3672.\t11.5\t72\t1\n+11.0\t8\t429.0\t208.0\t4633.\t11.0\t72\t1\n+13.0\t8\t350.0\t155.0\t4502.\t13.5\t72\t1\n+12.0\t8\t350.0\t160.0\t4456.\t13.5\t72\t1\n+13.0\t8\t400.0\t190.0\t4422.\t12.5\t72\t1\n+19.0\t3\t70.00\t97.00\t2330.\t13.5\t72\t3\n+15.0\t8\t304.0\t150.0\t3892.\t12.5\t72\t1\n+13.0\t8\t307.0\t130.0\t4098.\t14.0\t72\t1\n+13.0\t8\t302.0\t140.0\t4294.\t16.0\t72\t1\n+14.0\t8\t318.0\t150.0\t4077.\t14.0\t72\t1\n+18.0\t4\t121.0\t112.0\t2933.\t14.5\t72\t2\n+22.0\t4\t121.0\t76.00\t2511.\t18.0\t72\t2\n+21.0\t4\t120.0\t87.00\t2979.\t19.5\t72\t2\n+26.0\t4\t96.00\t69.00\t2189.\t18.0\t72\t2\n+22.0\t4\t122.0\t86.00\t2395.\t16.0\t72\t1\n+28.0\t4\t97.00\t92.00\t2288.\t17.0\t72\t3\n+23.0\t4\t120.0\t97.00\t2506.\t14.5\t72\t3\n+28.0\t4\t98.00\t80.00\t2164.\t15.0\t72\t1\n+27.0\t4\t97.00\t88.00\t2100.\t16.5\t72\t3\n+13.0\t8\t350.0\t175.0\t4100.\t13.0\t73\t1\n+14.0\t8\t304.0\t150.0\t3672.\t11.5\t73\t1\n+13.0\t8\t350.0\t145.0\t3988.\t13.0\t73\t1\n+14.0\t8\t302.0\t137.0\t4042.\t14.5\t73\t1\n+15.0\t8\t318.0\t150.0\t3777.\t12.5\t73\t1\n+12.0\t8\t429.0\t198.0\t4952.\t11.5\t73\t1\n+13.0\t8\t400.0\t150.0\t4464.\t12.0\t73\t1\n+13.0\t8\t351.0\t158.0\t4363.\t13.0\t73\t1\n+14.0\t8\t318.0\t150.0\t4237.\t14.5\t73\t1\n+13.0\t8\t440.0\t215.0\t4735.\t11.0\t73\t1\n+12.0\t8\t455.0\t225.0\t4951.\t11.0\t73\t1\n+13.0\t8\t360.0\t175.0\t3821.\t11.0\t73\t1\n+18.0\t6\t225.0\t105.0\t3121.\t16.5\t73\t1\n+16.0\t6\t250.0\t100.0\t3278.\t18.0\t73\t1\n+18.0\t6\t232.0\t100.0\t2945.\t16.0\t73\t1\n+18.0\t6\t250.0\t88.00\t3021.\t16.5\t73\t1\n+23.0\t6\t198.0\t95.00\t2904.\t16.0\t73\t1\n+26.0\t4\t97.00\t46.00\t1950.\t21.0\t73\t2\n+11.0\t8\t400.0\t150.0\t4997.\t14.0\t73\t1\n+12.0\t8\t400.0\t167.0\t4906.\t12.5\t73\t1\n+13.0\t8\t360.0\t170.0\t4654.\t13.0\t73\t1\n+12.0\t8\t350.0\t180.0\t4499.\t12.5\t73\t1\n+18.0\t6\t232.0\t100.0\t2789.\t15.0\t73\t1\n+20.0\t4\t97.00\t88.00\t2279.\t1'..b'29.0\t3725.\t13.4\t79\t1\n+16.5\t8\t351.0\t138.0\t3955.\t13.2\t79\t1\n+18.2\t8\t318.0\t135.0\t3830.\t15.2\t79\t1\n+16.9\t8\t350.0\t155.0\t4360.\t14.9\t79\t1\n+15.5\t8\t351.0\t142.0\t4054.\t14.3\t79\t1\n+19.2\t8\t267.0\t125.0\t3605.\t15.0\t79\t1\n+18.5\t8\t360.0\t150.0\t3940.\t13.0\t79\t1\n+31.9\t4\t89.00\t71.00\t1925.\t14.0\t79\t2\n+34.1\t4\t86.00\t65.00\t1975.\t15.2\t79\t3\n+35.7\t4\t98.00\t80.00\t1915.\t14.4\t79\t1\n+27.4\t4\t121.0\t80.00\t2670.\t15.0\t79\t1\n+25.4\t5\t183.0\t77.00\t3530.\t20.1\t79\t2\n+23.0\t8\t350.0\t125.0\t3900.\t17.4\t79\t1\n+27.2\t4\t141.0\t71.00\t3190.\t24.8\t79\t2\n+23.9\t8\t260.0\t90.00\t3420.\t22.2\t79\t1\n+34.2\t4\t105.0\t70.00\t2200.\t13.2\t79\t1\n+34.5\t4\t105.0\t70.00\t2150.\t14.9\t79\t1\n+31.8\t4\t85.00\t65.00\t2020.\t19.2\t79\t3\n+37.3\t4\t91.00\t69.00\t2130.\t14.7\t79\t2\n+28.4\t4\t151.0\t90.00\t2670.\t16.0\t79\t1\n+28.8\t6\t173.0\t115.0\t2595.\t11.3\t79\t1\n+26.8\t6\t173.0\t115.0\t2700.\t12.9\t79\t1\n+33.5\t4\t151.0\t90.00\t2556.\t13.2\t79\t1\n+41.5\t4\t98.00\t76.00\t2144.\t14.7\t80\t2\n+38.1\t4\t89.00\t60.00\t1968.\t18.8\t80\t3\n+32.1\t4\t98.00\t70.00\t2120.\t15.5\t80\t1\n+37.2\t4\t86.00\t65.00\t2019.\t16.4\t80\t3\n+28.0\t4\t151.0\t90.00\t2678.\t16.5\t80\t1\n+26.4\t4\t140.0\t88.00\t2870.\t18.1\t80\t1\n+24.3\t4\t151.0\t90.00\t3003.\t20.1\t80\t1\n+19.1\t6\t225.0\t90.00\t3381.\t18.7\t80\t1\n+34.3\t4\t97.00\t78.00\t2188.\t15.8\t80\t2\n+29.8\t4\t134.0\t90.00\t2711.\t15.5\t80\t3\n+31.3\t4\t120.0\t75.00\t2542.\t17.5\t80\t3\n+37.0\t4\t119.0\t92.00\t2434.\t15.0\t80\t3\n+32.2\t4\t108.0\t75.00\t2265.\t15.2\t80\t3\n+46.6\t4\t86.00\t65.00\t2110.\t17.9\t80\t3\n+27.9\t4\t156.0\t105.0\t2800.\t14.4\t80\t1\n+40.8\t4\t85.00\t65.00\t2110.\t19.2\t80\t3\n+44.3\t4\t90.00\t48.00\t2085.\t21.7\t80\t2\n+43.4\t4\t90.00\t48.00\t2335.\t23.7\t80\t2\n+36.4\t5\t121.0\t67.00\t2950.\t19.9\t80\t2\n+30.0\t4\t146.0\t67.00\t3250.\t21.8\t80\t2\n+44.6\t4\t91.00\t67.00\t1850.\t13.8\t80\t3\n+40.9\t4\t85.00\t?\t1835.\t17.3\t80\t2\n+33.8\t4\t97.00\t67.00\t2145.\t18.0\t80\t3\n+29.8\t4\t89.00\t62.00\t1845.\t15.3\t80\t2\n+32.7\t6\t168.0\t132.0\t2910.\t11.4\t80\t3\n+23.7\t3\t70.00\t100.0\t2420.\t12.5\t80\t3\n+35.0\t4\t122.0\t88.00\t2500.\t15.1\t80\t2\n+23.6\t4\t140.0\t?\t2905.\t14.3\t80\t1\n+32.4\t4\t107.0\t72.00\t2290.\t17.0\t80\t3\n+27.2\t4\t135.0\t84.00\t2490.\t15.7\t81\t1\n+26.6\t4\t151.0\t84.00\t2635.\t16.4\t81\t1\n+25.8\t4\t156.0\t92.00\t2620.\t14.4\t81\t1\n+23.5\t6\t173.0\t110.0\t2725.\t12.6\t81\t1\n+30.0\t4\t135.0\t84.00\t2385.\t12.9\t81\t1\n+39.1\t4\t79.00\t58.00\t1755.\t16.9\t81\t3\n+39.0\t4\t86.00\t64.00\t1875.\t16.4\t81\t1\n+35.1\t4\t81.00\t60.00\t1760.\t16.1\t81\t3\n+32.3\t4\t97.00\t67.00\t2065.\t17.8\t81\t3\n+37.0\t4\t85.00\t65.00\t1975.\t19.4\t81\t3\n+37.7\t4\t89.00\t62.00\t2050.\t17.3\t81\t3\n+34.1\t4\t91.00\t68.00\t1985.\t16.0\t81\t3\n+34.7\t4\t105.0\t63.00\t2215.\t14.9\t81\t1\n+34.4\t4\t98.00\t65.00\t2045.\t16.2\t81\t1\n+29.9\t4\t98.00\t65.00\t2380.\t20.7\t81\t1\n+33.0\t4\t105.0\t74.00\t2190.\t14.2\t81\t2\n+34.5\t4\t100.0\t?\t2320.\t15.8\t81\t2\n+33.7\t4\t107.0\t75.00\t2210.\t14.4\t81\t3\n+32.4\t4\t108.0\t75.00\t2350.\t16.8\t81\t3\n+32.9\t4\t119.0\t100.0\t2615.\t14.8\t81\t3\n+31.6\t4\t120.0\t74.00\t2635.\t18.3\t81\t3\n+28.1\t4\t141.0\t80.00\t3230.\t20.4\t81\t2\n+30.7\t6\t145.0\t76.00\t3160.\t19.6\t81\t2\n+25.4\t6\t168.0\t116.0\t2900.\t12.6\t81\t3\n+24.2\t6\t146.0\t120.0\t2930.\t13.8\t81\t3\n+22.4\t6\t231.0\t110.0\t3415.\t15.8\t81\t1\n+26.6\t8\t350.0\t105.0\t3725.\t19.0\t81\t1\n+20.2\t6\t200.0\t88.00\t3060.\t17.1\t81\t1\n+17.6\t6\t225.0\t85.00\t3465.\t16.6\t81\t1\n+28.0\t4\t112.0\t88.00\t2605.\t19.6\t82\t1\n+27.0\t4\t112.0\t88.00\t2640.\t18.6\t82\t1\n+34.0\t4\t112.0\t88.00\t2395.\t18.0\t82\t1\n+31.0\t4\t112.0\t85.00\t2575.\t16.2\t82\t1\n+29.0\t4\t135.0\t84.00\t2525.\t16.0\t82\t1\n+27.0\t4\t151.0\t90.00\t2735.\t18.0\t82\t1\n+24.0\t4\t140.0\t92.00\t2865.\t16.4\t82\t1\n+23.0\t4\t151.0\t?\t3035.\t20.5\t82\t1\n+36.0\t4\t105.0\t74.00\t1980.\t15.3\t82\t2\n+37.0\t4\t91.00\t68.00\t2025.\t18.2\t82\t3\n+31.0\t4\t91.00\t68.00\t1970.\t17.6\t82\t3\n+38.0\t4\t105.0\t63.00\t2125.\t14.7\t82\t1\n+36.0\t4\t98.00\t70.00\t2125.\t17.3\t82\t1\n+36.0\t4\t120.0\t88.00\t2160.\t14.5\t82\t3\n+36.0\t4\t107.0\t75.00\t2205.\t14.5\t82\t3\n+34.0\t4\t108.0\t70.00\t2245\t16.9\t82\t3\n+38.0\t4\t91.00\t67.00\t1965.\t15.0\t82\t3\n+32.0\t4\t91.00\t67.00\t1965.\t15.7\t82\t3\n+38.0\t4\t91.00\t67.00\t1995.\t16.2\t82\t3\n+25.0\t6\t181.0\t110.0\t2945.\t16.4\t82\t1\n+38.0\t6\t262.0\t85.00\t3015.\t17.0\t82\t1\n+26.0\t4\t156.0\t92.00\t2585.\t14.5\t82\t1\n+22.0\t6\t232.0\t112.0\t2835\t14.7\t82\t1\n+32.0\t4\t144.0\t96.00\t2665.\t13.9\t82\t3\n+36.0\t4\t135.0\t84.00\t2370.\t13.0\t82\t1\n+27.0\t4\t151.0\t90.00\t2950.\t17.3\t82\t1\n+27.0\t4\t140.0\t86.00\t2790.\t15.6\t82\t1\n+44.0\t4\t97.00\t52.00\t2130.\t24.6\t82\t2\n+32.0\t4\t135.0\t84.00\t2295.\t11.6\t82\t1\n+28.0\t4\t120.0\t79.00\t2625.\t18.6\t82\t1\n+31.0\t4\t119.0\t82.00\t2720.\t19.4\t82\t1\n'

diff -r 000000000000 -r 915447b14520 test-data/evaluation_report_classification.html
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/evaluation_report_classification.html Wed Dec 11 05:00:00 2024 +0000

[

b'@@ -0,0 +1,236 @@\n+\n+ \n+ <html>\n+ <head>\n+ <title>Model Training Report</title>\n+ <style>\n+ body {\n+ font-family: Arial, sans-serif;\n+ margin: 0;\n+ padding: 20px;\n+ background-color: #f4f4f4;\n+ }\n+ .container {\n+ max-width: 800px;\n+ margin: auto;\n+ background: white;\n+ padding: 20px;\n+ box-shadow: 0 0 10px rgba(0, 0, 0, 0.1);\n+ }\n+ h1 {\n+ text-align: center;\n+ color: #333;\n+ }\n+ h2 {\n+ border-bottom: 2px solid #4CAF50;\n+ color: #4CAF50;\n+ padding-bottom: 5px;\n+ }\n+ table {\n+ width: 100%;\n+ border-collapse: collapse;\n+ margin: 20px 0;\n+ }\n+ table, th, td {\n+ border: 1px solid #ddd;\n+ }\n+ th, td {\n+ padding: 8px;\n+ text-align: left;\n+ }\n+ th {\n+ background-color: #4CAF50;\n+ color: white;\n+ }\n+ .plot {\n+ text-align: center;\n+ margin: 20px 0;\n+ }\n+ .plot img {\n+ max-width: 100%;\n+ height: auto;\n+ }\n+ .tabs {\n+ display: flex;\n+ margin-bottom: 20px;\n+ cursor: pointer;\n+ justify-content: space-around;\n+ }\n+ .tab {\n+ padding: 10px;\n+ background-color: #4CAF50;\n+ color: white;\n+ border-radius: 5px 5px 0 0;\n+ flex-grow: 1;\n+ text-align: center;\n+ margin: 0 5px;\n+ }\n+ .tab.active-tab {\n+ background-color: #333;\n+ }\n+ .tab-content {\n+ display: none;\n+ padding: 20px;\n+ border: 1px solid #ddd;\n+ border-top: none;\n+ background-color: white;\n+ }\n+ .tab-content.active-content {\n+ display: block;\n+ }\n+ </style>\n+ </head>\n+ <body>\n+ <div class="container">\n+ \n+ <h1>Model Evaluation Report</h1>\n+ <div class="tabs">\n+ <div class="tab" onclick="openTab(event, \'metrics\')">Metrics</div>\n+ <div class="tab" onclick="openTab(event, \'plots\')">Plots</div>\n+ </div>\n+ <div id="metrics" class="tab-content">\n+ <h2>Metrics</h2>\n+ <table>\n+ <table border="1" class="dataframe table">\n+ <thead>\n+ <tr style="text-align: right;">\n+ <th>Model</th>\n+ <th>Accuracy</th>\n+ <th>AUC</th>\n+ <th>Recall</th>\n+ <th>Prec.</th>\n+ <th>F1</th>\n+ <th>Kappa</th>\n+ <th>MCC</th>\n+ <th>PR-AUC-Weighted</th>\n+ </tr>\n+ </thead>\n+ <tbody>\n+ <tr>\n+ <td>Light Gradient Boosting Machine</td>\n+ <td>0.7826</td>\n+ <td>0.8162</td>\n+ <td>0.7419</td>\n+ <td>0.7667</td>\n+ <td>0.7541</td>\n+ <td>0.5594</td>\n+ <td>0.5596</td>\n+ <td>0.7753</td>\n+ </tr>\n+ </tbody>\n+</table>\n+ </table>\n+ </div>\n+ <div id="plots" class="tab-content">\n+ <h2>Plots</h2>\n+ \n+ <div class="plot">\n+ <h3>Confusion_matrix</h3>\n+ <img src="data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAxAAAAIWCAYAAADH12tUAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuNSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/xnp5ZAAAACXBIWXMAAA9hAAAPYQGoP6dpAAA9qUlEQVR4nO3de3zP9f//8fs2xmYbJodhc7bFTs7nxhAZpemEHIocFkVJi0/l+JFPfFP2UfKJStIIxYjIITkTNqfIMadl5rxlb/P+/eGz98/a8Bzb3u8Pt+vl4vKx9+v1eu/xfuPT+7bXyclqtVoFAAAAAAac7T0AAAAAgP8dBAQAAAAAYwQEAAAAAGMEBAAAAABjBAQAAAAAYwQEAAAAAGMEBAAAAABjBAQAAAAAYwQEAAAAAGMEBADkEn9/f02ePNlu379bt27q1q1bpseSkpL0yiuvqEGDBvL399fnn3+uTZs2yd/fX5s2bbLTpPkru/cgt9n7z97RREdHKzw83N5jAMgjBASAPDN//nz5+/srISHhjutevnxZH3/8sSIjI1WnTh0FBgaqRYsWGjRokFavXp1p3YwPwDf/ql+/vp555hktXLgwy3OHh'..b'dJ8W7duhZeXFz777DPk5+erPGZaWprG823btg3NmzfHsmXLkJiYiBUrViAnJwerV69W+5o+++wzfP/99wgKCoKXlxdiY2OxYsUK1KlTB2+99VaZe/Tll1+iV69eWLNmDU6cOIGlS5ciNTUVFy5cwIcffojMzEwsXrwYy5cvx6effqr03I8//hj9+vVDWFgYzp07h9WrV8PMzExRx99//40xY8agbdu2WLNmDZKTk7Fy5UrEx8dj9+7d0NXVVcy1atUqdO7cGStXrkRhYSFsbGzw4Ycfom7duggODgYANGrUCEBRcO/atSvGjh0LHR0dnD59Gu+++y62bNmCtm3bKtX4wQcfYNiwYRgzZgz27t2LkJAQeHp6olmzZgCAw4cPY+bMmejevTtWrlwJfX19xMbG4vHjx3B3d0dubi7GjBmDlJQUTJ8+HQ0bNsSBAwfw3nvvKYI+EdUAAhERVZi8vDyhXbt2wty5cxXbrl+/Ljg7OwtRUVFqn3PgwAHB3d1dyMrKUmx3dnYW+vbtKxQWFiqNDw4OFvr161dqDerm69atm5Cfn6/Y9v333wsuLi5CfHy8IAiC8OuvvwrOzs7CX3/9JQiCINy9e1dwcXERdu/erXSM5cuXC35+fkJBQYHaOkJDQ4WWLVsqHt+7d09wdnYWpk2bptiWn58vdOjQQWjZsqWQmpqq2L506VKhdevWisfFdX344YdKx/jwww+Fjh07KuqYNGmS0KVLFyE3N1cx5syZM4Kzs7Pw008/KdUxbty4EjW/8847wrvvvqv2NQmCIBQUFAh5eXnC2LFjhZkzZyq279u3T3B2dha2b9+u2Pb8+XPBy8tLWLdunSAIglBYWCh06tRJGDt2rNr5IyIiBHd3d+HmzZtK24cOHSpMnTq11NqIqPrgchQiogqkp6eHPn364Pjx44rlIIcOHYKhoSF69uwJoGh5x3fffYe+ffuiRYsWaN68OT744APk5+fj3r17SvN16tQJEomk1GOWZb6uXbsqXQ3u06cPBEFQezeUc+fOAQB69eqF/Px8xVeHDh2QlJSEhw8flq1BAPz8/BTf6+rqwtbWFq6urqhfv75iu4ODA9LT0/H8+XOl5xb3sFjv3r3x+PFjPHr0CABw8eJFdO/eHfr6+oox/v7+MDU1xaVLl5Se26VLF41rfvToEYKDg9GxY0e4u7ujefPmiImJwZ07d0qM9ff3V3wvlUphY2OjqO/27dt49OgRhgwZovZYZ8+ehbOzMxwcHEr0vCLuWkNEVQOXoxARVbD+/ftj586dOHPmDLp3747o6Gh069YNRkZGAIAtW7Zg2bJlGD9+PNq2bQtTU1NcuXIFixYtQk5OjtJcFhYWLz3eq8xnbGyMOnXq4MmTJyrnTktLgyAIaNeuncr9Dx8+ROPGjV9a43+ZmJgoPdbX14dUKi2xDQBycnIUfQMAc3NzpXGWlpYAgKSkJNjY2CA9PV1lzywsLPDs2bMS2zRRWFiI999/HxkZGZg6dSrs7e1haGiI0NBQlT+EqHp9xT+QFa9Nb9CggdrjpaWl4erVq2jevHmJff/9AYqIqjeGcCKiCubj44PGjRvj0KFDsLCwQGJiIubOnavYf/ToUXTr1g2zZs1SbLt165bKuV52Fbys86WkpCg9zszMRE5OjtpQaGZmBolEgp07dypdXS7m6Oj40voqUmpqqtLj5ORkAICVlRWAonpffI1A0es2MzNT2qZJbwHg7t27uHr1KtatW4cePXootpfnjir16tUDALU/9ABFr8HFxQWfffZZmecnouqDy1GIiCqYRCJB//798fPPP2Pv3r2oV68eOnbsqNifnZ1dItC+ygfElGW+X375BQUFBYrHR48ehUQigaenp8rxxW8uffr0KTw9PUt8GRsbl7vu8vjxxx+VHh87dgwNGjRQvIGyVatW+Omnn5TexHr27Fmkp6ejVatWL51fX1+/xG8Pih//t8f379/H5cuXy1x/06ZN0ahRI0RGRqod06FDB9y7dw8NGjRQ2XMiqhl4JZyIqBL0798fX3/9NSIjI/Hmm28qBbgOHTpg69at2L59OxwcHHDgwAHcvXu33Mcqy3y5ubmYNGkS3nrrLcXdUXr37q24c8eLHB0d8fbbb2P27NkYN24cvLy8kJeXB5lMht9++w1fffVVuesuj19//RXLli2Dn58fzp49ix9++AHz58+Hjk7RNaWgoCAMHz4c7733HkaOHKm4O0qLFi3QuXPnl87ftGlT7N+/Hz///DOsrKzQoEEDRXAuvotKVlYWQkNDS11Soo5EIkFwcDBmzpyJKVOm4PXXX4eBgQH++OMPeHp6omvXrhg0aBB2796NwMBAjB07Fg4ODsjIyMDVq1eRl5en9BsPIqq+GMKJiCqBs7MzXFxccP36dQwYMEBp36RJk5CWlobQ0FAARW8unDdvHoKCgsp1rLLMN3LkSKSmpmL27NnIzc1Fz549MX/+/FLnnzdvHhwdHbFnzx6sW7cORkZGcHR0RJ8+fcpV76tYtGgR9uzZg127dsHIyAjTpk3D22+/rdjv4eGBb7/9FqtWrcKUKVMglUrRrVs3BAcHa7SeesKECUhISEBwcDDS09MxefJkTJkyBWFhYVi0aBGmTZsGa2trvP/++/j111/x999/l/k19O3bF3Xr1sWGDRswc+ZM1KlTB+7u7oo3nRoYGGDr1q0ICwvDhg0bkJSUhHr16sHd3R0jRowo8/GIqGqSCIIgaLsIIiKi0vz2228IDAxEREQEl2QQUY3ANeFERERERCJjCCciIiIiEhmXoxARERERiYxXwomIiIiIRMYQTkREREQkMoZwIiIiIiKRMYQTEREREYmMIZyIiIiISGQM4UREREREImMIJyIiIiISGUM4EREREZHI/g9UF7Glu/DZLAAAAABJRU5ErkJggg==" alt="feature_all">\n+ </div>\n+ <hr>\n+ \n+ </div>\n+ \n+ </div>\n+ <script>\n+ function openTab(evt, tabName) {{\n+ var i, tabcontent, tablinks;\n+ tabcontent = document.getElementsByClassName("tab-content");\n+ for (i = 0; i < tabcontent.length; i++) {{\n+ tabcontent[i].style.display = "none";\n+ }}\n+ tablinks = document.getElementsByClassName("tab");\n+ for (i = 0; i < tablinks.length; i++) {{\n+ tablinks[i].className =\n+ tablinks[i].className.replace(" active-tab", "");\n+ }}\n+ document.getElementById(tabName).style.display = "block";\n+ evt.currentTarget.className += " active-tab";\n+ }}\n+ document.addEventListener("DOMContentLoaded", function() {{\n+ document.querySelector(".tab").click();\n+ }});\n+ </script>\n+ </body>\n+ </html>\n+ \n+ \n\\ No newline at end of file\n'

diff -r 000000000000 -r 915447b14520 test-data/evaluation_report_regression.html
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/evaluation_report_regression.html Wed Dec 11 05:00:00 2024 +0000

[

b'@@ -0,0 +1,202 @@\n+\n+ \n+ <html>\n+ <head>\n+ <title>Model Training Report</title>\n+ <style>\n+ body {\n+ font-family: Arial, sans-serif;\n+ margin: 0;\n+ padding: 20px;\n+ background-color: #f4f4f4;\n+ }\n+ .container {\n+ max-width: 800px;\n+ margin: auto;\n+ background: white;\n+ padding: 20px;\n+ box-shadow: 0 0 10px rgba(0, 0, 0, 0.1);\n+ }\n+ h1 {\n+ text-align: center;\n+ color: #333;\n+ }\n+ h2 {\n+ border-bottom: 2px solid #4CAF50;\n+ color: #4CAF50;\n+ padding-bottom: 5px;\n+ }\n+ table {\n+ width: 100%;\n+ border-collapse: collapse;\n+ margin: 20px 0;\n+ }\n+ table, th, td {\n+ border: 1px solid #ddd;\n+ }\n+ th, td {\n+ padding: 8px;\n+ text-align: left;\n+ }\n+ th {\n+ background-color: #4CAF50;\n+ color: white;\n+ }\n+ .plot {\n+ text-align: center;\n+ margin: 20px 0;\n+ }\n+ .plot img {\n+ max-width: 100%;\n+ height: auto;\n+ }\n+ .tabs {\n+ display: flex;\n+ margin-bottom: 20px;\n+ cursor: pointer;\n+ justify-content: space-around;\n+ }\n+ .tab {\n+ padding: 10px;\n+ background-color: #4CAF50;\n+ color: white;\n+ border-radius: 5px 5px 0 0;\n+ flex-grow: 1;\n+ text-align: center;\n+ margin: 0 5px;\n+ }\n+ .tab.active-tab {\n+ background-color: #333;\n+ }\n+ .tab-content {\n+ display: none;\n+ padding: 20px;\n+ border: 1px solid #ddd;\n+ border-top: none;\n+ background-color: white;\n+ }\n+ .tab-content.active-content {\n+ display: block;\n+ }\n+ </style>\n+ </head>\n+ <body>\n+ <div class="container">\n+ \n+ <h1>Model Evaluation Report</h1>\n+ <div class="tabs">\n+ <div class="tab" onclick="openTab(event, \'metrics\')">Metrics</div>\n+ <div class="tab" onclick="openTab(event, \'plots\')">Plots</div>\n+ </div>\n+ <div id="metrics" class="tab-content">\n+ <h2>Metrics</h2>\n+ <table>\n+ <table border="1" class="dataframe table">\n+ <thead>\n+ <tr style="text-align: right;">\n+ <th>Model</th>\n+ <th>MAE</th>\n+ <th>MSE</th>\n+ <th>RMSE</th>\n+ <th>R2</th>\n+ <th>RMSLE</th>\n+ <th>MAPE</th>\n+ </tr>\n+ </thead>\n+ <tbody>\n+ <tr>\n+ <td>Gradient Boosting Regressor</td>\n+ <td>1.6</td>\n+ <td>5.6214</td>\n+ <td>2.3709</td>\n+ <td>0.9077</td>\n+ <td>0.0875</td>\n+ <td>0.0691</td>\n+ </tr>\n+ </tbody>\n+</table>\n+ </table>\n+ </div>\n+ <div id="plots" class="tab-content">\n+ <h2>Plots</h2>\n+ \n+ <div class="plot">\n+ <h3>Residuals</h3>\n+ <img src="data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAvwAAAH+CAYAAADgeNoZAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuNSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/xnp5ZAAAACXBIWXMAAA9hAAAPYQGoP6dpAAD6wklEQVR4nOydd5xU1fn/3/feabv0pSlNUVxEAQEFBREiKhbQxAKiFLtRsWCLJSb5YvLThMRYQNHYFQSMaAJSxBJFsYCKioAiSEd0YYFddnfKvff8/rgzszM7s7szu7P9eb9evmTv3HLOuXfmfs5znqIppRSCIAiCIAiCIDRK9LpugCAIgiAIgiAINYcIfkEQBEEQBEFoxIjgFwRBEARBEIRGjAh+QRAEQRAEQWjEiOAXBEEQBEEQhEaMCH5BEARBEARBaMSI4BcEQRAEQRCERowIfkEQBEEQBEFoxIjgFwRBEARBEIRGjAh+ocEwffp0evbsmdK+PXv2ZPr06TXanokTJzJx4sSMnGvPnj3cfPPNnHjiifTs2ZMXXnghI+dtCHz22Wf07NmTzz77LLrt7rvvZsSIEXXYqobLiBEjuPvuu+u6GUIDpTq/a/LsCUL9xVXXDRAaHq+//jr33HNP9G/DMGjbti0nn3wyt956Kx07dqzD1jVMHnzwQT788ENuvPFG2rVrR+/evWv8msFgkHnz5rF48WI2btxISUkJrVu3pnfv3'..b'qXRuRdqrbtOe+41NDY2tpyqwMwuArYnRiY/QEzbuYG7P2lm+xArMn69riUVEREREZFSyg7k3RE42t1HEjP25I0BVmlDmUREREREpI7KBv1LUH0wwuIl8xQRERERkXZQNuh/lpims5JtiQEOIiIiIiLSCZSdvecU4MY0oPdaYjDvRma2C/BzYiSziIiIiIh0AqUG8gKY2Y7AOcDKuc3jgMPdvTiaXkREREREOkirg/60hPCu+ekTzWx3YlXFhYFJ7v5Su5RSRERERERKq6V7z9rAYtkLM1sI+Buwobs/Wu+CiYiIiIhIfZQdyJtpqEspRERERESk3bQ16BcRkQ5iZjeb2SvN7P+1mTWa2Zfa+DnDzOz5llPO875GMzuyhTQDUroNypdwTl6DzWxKW/OZn8xs77SatohIu6o16K80AKDcSGAREWmrK4HVzWzDKvt3AR5299fa+DmnAApM28feqG5FZD6odcrOe8xsdmHbAxW2Nbr7F9pQLhERadmNwBQiaHwsv8PMVgE2Bn5TNnMzW9Tdp9bhokEKsrrt6HKIyIKjlqD/pHYrhYiI1MzdPzWzG4GdzOwId883wOwCzAKuNrNlgdOAAcCyxPTK1wInufu07A1m1ggMAvoAexErrH/ezIYBG7j72ildq/JLFjazs4F9gEWA64Ffu/vH1Y7LzBqAI4D9iGmh3wYucPff11I/6cLnDWBP4P+AnYHpwJnufr6Z7Uz8b1sWuAv4ubtPTu8dANxDLDj5C+D7wEepHKcXPmdT4ExgPeAT4CbgSHefVCjHPqkc2wPjzewD4LspTXbX/CR3H2xm2wKHAuukensRONHd78h97t7A5cD6wOnApsB44BR3v6JQxm2BY1MZpwHPAIe5+1Npf++Ux3bAUsDzwCB3H9mauhaRzq/VQb+7K+gXEel8rgR2IwLw0bntuwKj3H2imX0NmAQcDvwPWBMYTAS7+xTyOwR4GNiX6v8j+tWQ36+BJ4mLiFWJ4HgRIgCvZigRaJ8GPAJ8GzjLzKa6+5+beV81pxEXGz8FBgLnmdkXiTo7ClgSuAA4m7jQyLsEuIoI1LcETjOzSVk5zOwbwCjg3pR//3SMa5nZt919Vi6vM4BbiQuyHsCbwD+AT4Fs7MO49LgqcDNwLjAb2Aa4zcw2d/d7C2X8J3ApcD7wS2CYmT3m7i+mMv4sHcONxHkxnbj4WB54ysx6pWPoDxxHXGTtDtxqZuu7+3PNVa6IdA1lV+QVEZHOYSTwHhFIjgYws7WJaZbPBkhB25wBtWb2ENEi/TczO8jdP83lNwnY3t2rjteqMb9pwMAs+DWzqcBlZja40touadDxwcD+7n5J2nxXWgH+RDO7pHBHozX+4+6HpfxHAzsQFyMru/sHafs6xIVOMegf7e6/Tc/vNLP+wPG5chwHTAB+6O4zUl5vAXcSq9PfnMvraXf/ReF4PwKmuPvD+e3u/sdcmh7EXYe1UvnuLZTxj+5+UUr7b+LuxA7AqemuybnASHffLvee23LPdwPWBdZx9xdyx7oGcAKwEyLS5SnoFxHpwtx9ppldC+ySAu7pxAXAp8BwmNNd5hAiYFyVaGnPrEZ05cjc3lzAXyK/mwut3dcBfwE2Aiot6LhlerzezPL/o+4CjgZWBMY2V74KRmVP3H2Wmb0OzM4C/uRloLeZLeHu+RmAhhfyug7YA1iBaKnfBLgqC/jTZ4w0s8nAd5g76L+1tQU2sxWIOxRbEndQsimyn6iQfE4XHHf/xMzGpvIBWHp+RDMf9z3gOeDlQp2PIlr8RaQb0JSdIiJd35VEP/yt0+tdgJtyweuhwHlE946fEAH3QWlfPmAHeLcVn1dLfhPzL9z9I+AzIpCtpB8R4L4PzMj9ZIH7iq0oX9HkwuvpVbZBC+WnqX6y8vehcp29S/SNr/TeZqWW/ZuIi4bfAZsBGwK3VygfVD6WLF3f9Di+mY/sR/T1n1H4OZ5y9S0inZBa+kVEur5/A2OI1v6JROv7Ibn9PyUuAgZlG8zsq1Xyas00zLXkt3T+hZktSQSk71RJPymV4Ts0BeJ53ory1dPShdf902NW/kkV0mTpJhW2tXaK69WJIHygu9+YbTSzRVv5/rzsbsZyzaSZBDxLdG8SkW5KQb+ISBfn7o1mdhUR6H9KBHp35JIsyrwB9G5t+Mha8vuRmR2e6+KzIxH8PlYl/d3psa+731wlzfy0HXN38dmRaDXPBtw+CAxMsyfNBDCzrYDeaV9L8q3ymUVz+0h5rkwMvn25xvJ7Kus+wDVV0txFjD8Y7+7N3REQkS5MQb+ISPdwJTHd5j7Axfk+5kTXmEPM7GAiaNydaE0uq5b8PgeMMLOLiDsQZwHXZTPLFLn7y2Z2IfB3MzuHmL2nJzFD0GbuPrAN5S5j81SOUcBWRH/+g3KDiU8j7rTcYmYX0DR7z6PMPVi2mheBvczsR8Tdg/HEWIdxwJlmthCwBDG16Nu1Fj5dEB4JXGVm1wNXEIOrNwYec/db0rZfAfea2bmk8Q3E3YZe+Ts6ItJ1qU+/iEg34O7PE100GogLgLyT07aTgX8RfepLL9pVY34XEEHkP4iAfzgxHWdzfkP0J9+ZGPz6D+BnwH1tKHNZvyIuOIYTAf8J2Uw5AO7+BDEQdkliWtBziDJvUxjAXM3ZwENE4P0YsF9a62B7Iji/lqjn0yh5/O5+NTH2Ynni93UV0X1qXNo/DdgcuIWYjWgkcBGwAa27WyEiXUBDY2NruxiKiIgsGHKLc23o7o93cHFERNpMLf0iIiIiIt2cgn4RERERkW5O3XtERERERLo5tfSLiIiIiHRzCvpFRERERLo5Bf0iIiIiIt2cgn4RERERkW5OQb+IiIiISDenoF9EREREpJtT0C8iIiIi0s0p6BcRERER6eb+H+qiYLuIPmrvAAAAAElFTkSuQmCC" alt="feature_all">\n+ </div>\n+ <hr>\n+ \n+ </div>\n+ \n+ </div>\n+ <script>\n+ function openTab(evt, tabName) {{\n+ var i, tabcontent, tablinks;\n+ tabcontent = document.getElementsByClassName("tab-content");\n+ for (i = 0; i < tabcontent.length; i++) {{\n+ tabcontent[i].style.display = "none";\n+ }}\n+ tablinks = document.getElementsByClassName("tab");\n+ for (i = 0; i < tablinks.length; i++) {{\n+ tablinks[i].className =\n+ tablinks[i].className.replace(" active-tab", "");\n+ }}\n+ document.getElementById(tabName).style.display = "block";\n+ evt.currentTarget.className += " active-tab";\n+ }}\n+ document.addEventListener("DOMContentLoaded", function() {{\n+ document.querySelector(".tab").click();\n+ }});\n+ </script>\n+ </body>\n+ </html>\n+ \n+ \n\\ No newline at end of file\n'

diff -r 000000000000 -r 915447b14520 test-data/expected_best_model_classification.csv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/expected_best_model_classification.csv Wed Dec 11 05:00:00 2024 +0000

@@ -0,0 +1,20 @@
+Parameter,Value
+boosting_type,gbdt
+class_weight,
+colsample_bytree,1.0
+importance_type,split
+learning_rate,0.1
+max_depth,-1
+min_child_samples,20
+min_child_weight,0.001
+min_split_gain,0.0
+n_estimators,100
+n_jobs,-1
+num_leaves,31
+objective,
+random_state,42
+reg_alpha,0.0
+reg_lambda,0.0
+subsample,1.0
+subsample_for_bin,200000
+subsample_freq,0

diff -r 000000000000 -r 915447b14520 test-data/expected_best_model_classification_customized.csv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/expected_best_model_classification_customized.csv Wed Dec 11 05:00:00 2024 +0000

diff -r 000000000000 -r 915447b14520 test-data/expected_best_model_regression.csv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/expected_best_model_regression.csv Wed Dec 11 05:00:00 2024 +0000

@@ -0,0 +1,22 @@
+Parameter,Value
+alpha,0.9
+ccp_alpha,0.0
+criterion,friedman_mse
+init,
+learning_rate,0.1
+loss,squared_error
+max_depth,3
+max_features,
+max_leaf_nodes,
+min_impurity_decrease,0.0
+min_samples_leaf,1
+min_samples_split,2
+min_weight_fraction_leaf,0.0
+n_estimators,100
+n_iter_no_change,
+random_state,42
+subsample,1.0
+tol,0.0001
+validation_fraction,0.1
+verbose,0
+warm_start,False

diff -r 000000000000 -r 915447b14520 test-data/expected_comparison_result_classification.html
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/expected_comparison_result_classification.html Wed Dec 11 05:00:00 2024 +0000

[

b'@@ -0,0 +1,606 @@\n+\n+ \n+ <html>\n+ <head>\n+ <title>Model Training Report</title>\n+ <style>\n+ body {\n+ font-family: Arial, sans-serif;\n+ margin: 0;\n+ padding: 20px;\n+ background-color: #f4f4f4;\n+ }\n+ .container {\n+ max-width: 800px;\n+ margin: auto;\n+ background: white;\n+ padding: 20px;\n+ box-shadow: 0 0 10px rgba(0, 0, 0, 0.1);\n+ }\n+ h1 {\n+ text-align: center;\n+ color: #333;\n+ }\n+ h2 {\n+ border-bottom: 2px solid #4CAF50;\n+ color: #4CAF50;\n+ padding-bottom: 5px;\n+ }\n+ table {\n+ width: 100%;\n+ border-collapse: collapse;\n+ margin: 20px 0;\n+ }\n+ table, th, td {\n+ border: 1px solid #ddd;\n+ }\n+ th, td {\n+ padding: 8px;\n+ text-align: left;\n+ }\n+ th {\n+ background-color: #4CAF50;\n+ color: white;\n+ }\n+ .plot {\n+ text-align: center;\n+ margin: 20px 0;\n+ }\n+ .plot img {\n+ max-width: 100%;\n+ height: auto;\n+ }\n+ .tabs {\n+ display: flex;\n+ margin-bottom: 20px;\n+ cursor: pointer;\n+ justify-content: space-around;\n+ }\n+ .tab {\n+ padding: 10px;\n+ background-color: #4CAF50;\n+ color: white;\n+ border-radius: 5px 5px 0 0;\n+ flex-grow: 1;\n+ text-align: center;\n+ margin: 0 5px;\n+ }\n+ .tab.active-tab {\n+ background-color: #333;\n+ }\n+ .tab-content {\n+ display: none;\n+ padding: 20px;\n+ border: 1px solid #ddd;\n+ border-top: none;\n+ background-color: white;\n+ }\n+ .tab-content.active-content {\n+ display: block;\n+ }\n+ </style>\n+ </head>\n+ <body>\n+ <div class="container">\n+ \n+ <h1>PyCaret Model Training Report</h1>\n+ <div class="tabs">\n+ <div class="tab" onclick="openTab(event, \'summary\')">\n+ Setup & Best Model</div>\n+ <div class="tab" onclick="openTab(event, \'plots\')">\n+ Best Model Plots</div>\n+ <div class="tab" onclick="openTab(event, \'feature\')">\n+ Feature Importance</div>\n+ <div class="tab" onclick="openTab(event, \'explainer\')">\n+ Explainer\n+ </div>\n+ </div>\n+ <div id="summary" class="tab-content">\n+ <h2>Setup Parameters</h2>\n+ <table>\n+ <tr><th>Parameter</th><th>Value</th></tr>\n+ <table border="1" class="dataframe table">\n+ <tbody>\n+ <tr>\n+ <td>target</td>\n+ <td>PCR</td>\n+ </tr>\n+ <tr>\n+ <td>session_id</td>\n+ <td>42</td>\n+ </tr>\n+ <tr>\n+ <td>index</td>\n+ <td>False</td>\n+ </tr>\n+ </tbody>\n+</table>\n+ </table>\n+ <h5>If you want to know all the experiment setup parameters,\n+ please check the PyCaret documentation for\n+ the classification/regression <code>exp</code> function.</h5>\n+ <h2>Best Model: LGBMClassifier</h2>\n+ <table>\n+ <tr><th>Parameter</th><th>Value</th></tr>\n+ <table border="1" class="dataframe table">\n+ <tbody>\n+ <tr>\n+ <td>boosting_type</td>\n+ <td>gbdt</td>\n+ </tr>\n+ <tr>\n+ <td>class_weight</td>\n+ <td>None</td>\n+ </tr>\n+ <tr>\n+ <td>colsample_bytree</td>\n+ <td>1.0</td>\n+ </tr>\n+ <tr>\n+ <td>importance'..b'11 \\u003cbr\\u003eprecision: 0.42 \\u003cbr\\u003erecall: 0.89","threshold: 0.14 \\u003cbr\\u003eprecision: 0.44 \\u003cbr\\u003erecall: 0.89","threshold: 0.14 \\u003cbr\\u003eprecision: 0.47 \\u003cbr\\u003erecall: 0.89","threshold: 0.16 \\u003cbr\\u003eprecision: 0.50 \\u003cbr\\u003erecall: 0.89","threshold: 0.24 \\u003cbr\\u003eprecision: 0.53 \\u003cbr\\u003erecall: 0.89","threshold: 0.27 \\u003cbr\\u003eprecision: 0.50 \\u003cbr\\u003erecall: 0.78","threshold: 0.28 \\u003cbr\\u003eprecision: 0.64 \\u003cbr\\u003erecall: 0.78","threshold: 0.28 \\u003cbr\\u003eprecision: 0.70 \\u003cbr\\u003erecall: 0.78","threshold: 0.36 \\u003cbr\\u003eprecision: 0.67 \\u003cbr\\u003erecall: 0.67","threshold: 0.41 \\u003cbr\\u003eprecision: 0.75 \\u003cbr\\u003erecall: 0.67","threshold: 0.54 \\u003cbr\\u003eprecision: 0.86 \\u003cbr\\u003erecall: 0.67","threshold: 0.60 \\u003cbr\\u003eprecision: 0.83 \\u003cbr\\u003erecall: 0.56","threshold: 0.61 \\u003cbr\\u003eprecision: 0.80 \\u003cbr\\u003erecall: 0.44","threshold: 0.69 \\u003cbr\\u003eprecision: 0.67 \\u003cbr\\u003erecall: 0.22","threshold: 0.82 \\u003cbr\\u003eprecision: 0.50 \\u003cbr\\u003erecall: 0.11"],"x":[0.42857142857142855,0.4,0.42105263157894735,0.4444444444444444,0.47058823529411764,0.5,0.5333333333333333,0.5,0.6363636363636364,0.7,0.6666666666666666,0.75,0.8571428571428571,0.8333333333333334,0.8,0.6666666666666666,0.5,1.0],"y":[1.0,0.8888888888888888,0.8888888888888888,0.8888888888888888,0.8888888888888888,0.8888888888888888,0.8888888888888888,0.7777777777777778,0.7777777777777778,0.7777777777777778,0.6666666666666666,0.6666666666666666,0.6666666666666666,0.5555555555555556,0.4444444444444444,0.2222222222222222,0.1111111111111111,0.0],"type":"scatter"}], {"hovermode":"closest","plot_bgcolor":"#fff","title":{"text":"PR AUC CURVE"},"xaxis":{"constrain":"domain","range":[0,1],"title":{"text":"Precision"}},"yaxis":{"constrain":"domain","range":[0,1],"scaleanchor":"x","scaleratio":1,"title":{"text":"Recall"}},"template":{"data":{"scatter":[{"type":"scatter"}]}},"annotations":[{"align":"right","showarrow":false,"text":"pr-auc-score: 0.68","x":0.15,"xanchor":"left","y":0.4,"yanchor":"top"},{"align":"right","showarrow":false,"text":"cutoff: 0.50","x":0.15,"xanchor":"left","y":0.35,"yanchor":"top"},{"align":"right","showarrow":false,"text":"precision: 0.86","x":0.15,"xanchor":"left","y":0.3,"yanchor":"top"},{"align":"right","showarrow":false,"text":"recall: 0.67","x":0.15,"xanchor":"left","y":0.25,"yanchor":"top"}],"shapes":[{"line":{"color":"lightslategray","width":1},"type":"line","x0":0,"x1":1,"xref":"x","y0":0.6666666666666666,"y1":0.6666666666666666,"yref":"y"},{"line":{"color":"lightslategray","width":1},"type":"line","x0":0.8571428571428571,"x1":0.8571428571428571,"xref":"x","y0":0,"y1":1,"yref":"y"}],"margin":{"t":60,"b":60,"l":40,"r":40}}, {"responsive": true} ) }; </script> </div><hr>\n+ \n+ </div>\n+ \n+ </div>\n+ <script>\n+ function openTab(evt, tabName) {{\n+ var i, tabcontent, tablinks;\n+ tabcontent = document.getElementsByClassName("tab-content");\n+ for (i = 0; i < tabcontent.length; i++) {{\n+ tabcontent[i].style.display = "none";\n+ }}\n+ tablinks = document.getElementsByClassName("tab");\n+ for (i = 0; i < tablinks.length; i++) {{\n+ tablinks[i].className =\n+ tablinks[i].className.replace(" active-tab", "");\n+ }}\n+ document.getElementById(tabName).style.display = "block";\n+ evt.currentTarget.className += " active-tab";\n+ }}\n+ document.addEventListener("DOMContentLoaded", function() {{\n+ document.querySelector(".tab").click();\n+ }});\n+ </script>\n+ </body>\n+ </html>\n+ \n+ \n\\ No newline at end of file\n'

diff -r 000000000000 -r 915447b14520 test-data/expected_comparison_result_classification_customized.html
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/expected_comparison_result_classification_customized.html Wed Dec 11 05:00:00 2024 +0000

[

b'@@ -0,0 +1,620 @@\n+\n+ \n+ <html>\n+ <head>\n+ <title>Model Training Report</title>\n+ <style>\n+ body {\n+ font-family: Arial, sans-serif;\n+ margin: 0;\n+ padding: 20px;\n+ background-color: #f4f4f4;\n+ }\n+ .container {\n+ max-width: 800px;\n+ margin: auto;\n+ background: white;\n+ padding: 20px;\n+ box-shadow: 0 0 10px rgba(0, 0, 0, 0.1);\n+ }\n+ h1 {\n+ text-align: center;\n+ color: #333;\n+ }\n+ h2 {\n+ border-bottom: 2px solid #4CAF50;\n+ color: #4CAF50;\n+ padding-bottom: 5px;\n+ }\n+ table {\n+ width: 100%;\n+ border-collapse: collapse;\n+ margin: 20px 0;\n+ }\n+ table, th, td {\n+ border: 1px solid #ddd;\n+ }\n+ th, td {\n+ padding: 8px;\n+ text-align: left;\n+ }\n+ th {\n+ background-color: #4CAF50;\n+ color: white;\n+ }\n+ .plot {\n+ text-align: center;\n+ margin: 20px 0;\n+ }\n+ .plot img {\n+ max-width: 100%;\n+ height: auto;\n+ }\n+ .tabs {\n+ display: flex;\n+ margin-bottom: 20px;\n+ cursor: pointer;\n+ justify-content: space-around;\n+ }\n+ .tab {\n+ padding: 10px;\n+ background-color: #4CAF50;\n+ color: white;\n+ border-radius: 5px 5px 0 0;\n+ flex-grow: 1;\n+ text-align: center;\n+ margin: 0 5px;\n+ }\n+ .tab.active-tab {\n+ background-color: #333;\n+ }\n+ .tab-content {\n+ display: none;\n+ padding: 20px;\n+ border: 1px solid #ddd;\n+ border-top: none;\n+ background-color: white;\n+ }\n+ .tab-content.active-content {\n+ display: block;\n+ }\n+ </style>\n+ </head>\n+ <body>\n+ <div class="container">\n+ \n+ <h1>PyCaret Model Training Report</h1>\n+ <div class="tabs">\n+ <div class="tab" onclick="openTab(event, \'summary\')">\n+ Setup & Best Model</div>\n+ <div class="tab" onclick="openTab(event, \'plots\')">\n+ Best Model Plots</div>\n+ <div class="tab" onclick="openTab(event, \'feature\')">\n+ Feature Importance</div>\n+ <div class="tab" onclick="openTab(event, \'explainer\')">\n+ Explainer\n+ </div>\n+ </div>\n+ <div id="summary" class="tab-content">\n+ <h2>Setup Parameters</h2>\n+ <table>\n+ <tr><th>Parameter</th><th>Value</th></tr>\n+ <table border="1" class="dataframe table">\n+ <tbody>\n+ <tr>\n+ <td>target</td>\n+ <td>PCR</td>\n+ </tr>\n+ <tr>\n+ <td>session_id</td>\n+ <td>42</td>\n+ </tr>\n+ <tr>\n+ <td>index</td>\n+ <td>False</td>\n+ </tr>\n+ <tr>\n+ <td>train_size</td>\n+ <td>0.8</td>\n+ </tr>\n+ <tr>\n+ <td>normalize</td>\n+ <td>True</td>\n+ </tr>\n+ <tr>\n+ <td>feature_selection</td>\n+ <td>True</td>\n+ </tr>\n+ <tr>\n+ <td>fold</td>\n+ <td>5</td>\n+ </tr>\n+ <tr>\n+ <td>remove_outliers</td>\n+ <td>True</td>\n+ </tr>\n+ <tr>\n+ <td>remove_multicollinearity</td>\n+ <td>True</td>\n+ </tr>\n+ </tbody>\n+</table>\n+ </table>\n+ <h5>If you want to know all the experiment setup parameters,\n+ please check the PyCaret documentation for\n+ the classification/regression <code>exp</code> function.</h5>\n+ <h2>Best Model: LG'..b' {"responsive": true} ) }; </script> </div><hr><div> <script type="text/javascript">window.PlotlyConfig = {MathJaxConfig: \'local\'};</script>\n+ <script charset="utf-8" src="https://cdn.plot.ly/plotly-2.35.2.min.js"></script> <div id="560e6bb1-0157-4544-9b69-5a8698e317fe" class="plotly-graph-div" style="height:350px; width:100%;"></div> <script type="text/javascript"> window.PLOTLYENV=window.PLOTLYENV || {}; if (document.getElementById("560e6bb1-0157-4544-9b69-5a8698e317fe")) { Plotly.newPlot( "560e6bb1-0157-4544-9b69-5a8698e317fe", [{"hoverinfo":"text","mode":"lines","name":"PR AUC CURVE","text":["threshold: 0.16 \\u003cbr\\u003eprecision: 0.43 \\u003cbr\\u003erecall: 1.00","threshold: 0.26 \\u003cbr\\u003eprecision: 0.50 \\u003cbr\\u003erecall: 1.00","threshold: 0.33 \\u003cbr\\u003eprecision: 0.55 \\u003cbr\\u003erecall: 1.00","threshold: 0.41 \\u003cbr\\u003eprecision: 0.56 \\u003cbr\\u003erecall: 0.83","threshold: 0.49 \\u003cbr\\u003eprecision: 0.50 \\u003cbr\\u003erecall: 0.67","threshold: 0.60 \\u003cbr\\u003eprecision: 0.80 \\u003cbr\\u003erecall: 0.67","threshold: 0.64 \\u003cbr\\u003eprecision: 0.50 \\u003cbr\\u003erecall: 0.17","threshold: 0.75 \\u003cbr\\u003eprecision: 1.00 \\u003cbr\\u003erecall: 0.17"],"x":[0.42857142857142855,0.5,0.5454545454545454,0.5555555555555556,0.5,0.8,0.5,1.0,1.0],"y":[1.0,1.0,1.0,0.8333333333333334,0.6666666666666666,0.6666666666666666,0.16666666666666666,0.16666666666666666,0.0],"type":"scatter"}], {"hovermode":"closest","plot_bgcolor":"#fff","title":{"text":"PR AUC CURVE"},"xaxis":{"constrain":"domain","range":[0,1],"title":{"text":"Precision"}},"yaxis":{"constrain":"domain","range":[0,1],"scaleanchor":"x","scaleratio":1,"title":{"text":"Recall"}},"template":{"data":{"scatter":[{"type":"scatter"}]}},"annotations":[{"align":"right","showarrow":false,"text":"pr-auc-score: 0.75","x":0.15,"xanchor":"left","y":0.4,"yanchor":"top"},{"align":"right","showarrow":false,"text":"cutoff: 0.50","x":0.15,"xanchor":"left","y":0.35,"yanchor":"top"},{"align":"right","showarrow":false,"text":"precision: 0.50","x":0.15,"xanchor":"left","y":0.3,"yanchor":"top"},{"align":"right","showarrow":false,"text":"recall: 0.67","x":0.15,"xanchor":"left","y":0.25,"yanchor":"top"}],"shapes":[{"line":{"color":"lightslategray","width":1},"type":"line","x0":0,"x1":1,"xref":"x","y0":0.6666666666666666,"y1":0.6666666666666666,"yref":"y"},{"line":{"color":"lightslategray","width":1},"type":"line","x0":0.5,"x1":0.5,"xref":"x","y0":0,"y1":1,"yref":"y"}],"margin":{"t":60,"b":60,"l":40,"r":40}}, {"responsive": true} ) }; </script> </div><hr>\n+ \n+ </div>\n+ \n+ </div>\n+ <script>\n+ function openTab(evt, tabName) {{\n+ var i, tabcontent, tablinks;\n+ tabcontent = document.getElementsByClassName("tab-content");\n+ for (i = 0; i < tabcontent.length; i++) {{\n+ tabcontent[i].style.display = "none";\n+ }}\n+ tablinks = document.getElementsByClassName("tab");\n+ for (i = 0; i < tablinks.length; i++) {{\n+ tablinks[i].className =\n+ tablinks[i].className.replace(" active-tab", "");\n+ }}\n+ document.getElementById(tabName).style.display = "block";\n+ evt.currentTarget.className += " active-tab";\n+ }}\n+ document.addEventListener("DOMContentLoaded", function() {{\n+ document.querySelector(".tab").click();\n+ }});\n+ </script>\n+ </body>\n+ </html>\n+ \n+ \n\\ No newline at end of file\n'

diff -r 000000000000 -r 915447b14520 test-data/expected_comparison_result_regression.html
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/expected_comparison_result_regression.html Wed Dec 11 05:00:00 2024 +0000

[

b'@@ -0,0 +1,591 @@\n+\n+ \n+ <html>\n+ <head>\n+ <title>Model Training Report</title>\n+ <style>\n+ body {\n+ font-family: Arial, sans-serif;\n+ margin: 0;\n+ padding: 20px;\n+ background-color: #f4f4f4;\n+ }\n+ .container {\n+ max-width: 800px;\n+ margin: auto;\n+ background: white;\n+ padding: 20px;\n+ box-shadow: 0 0 10px rgba(0, 0, 0, 0.1);\n+ }\n+ h1 {\n+ text-align: center;\n+ color: #333;\n+ }\n+ h2 {\n+ border-bottom: 2px solid #4CAF50;\n+ color: #4CAF50;\n+ padding-bottom: 5px;\n+ }\n+ table {\n+ width: 100%;\n+ border-collapse: collapse;\n+ margin: 20px 0;\n+ }\n+ table, th, td {\n+ border: 1px solid #ddd;\n+ }\n+ th, td {\n+ padding: 8px;\n+ text-align: left;\n+ }\n+ th {\n+ background-color: #4CAF50;\n+ color: white;\n+ }\n+ .plot {\n+ text-align: center;\n+ margin: 20px 0;\n+ }\n+ .plot img {\n+ max-width: 100%;\n+ height: auto;\n+ }\n+ .tabs {\n+ display: flex;\n+ margin-bottom: 20px;\n+ cursor: pointer;\n+ justify-content: space-around;\n+ }\n+ .tab {\n+ padding: 10px;\n+ background-color: #4CAF50;\n+ color: white;\n+ border-radius: 5px 5px 0 0;\n+ flex-grow: 1;\n+ text-align: center;\n+ margin: 0 5px;\n+ }\n+ .tab.active-tab {\n+ background-color: #333;\n+ }\n+ .tab-content {\n+ display: none;\n+ padding: 20px;\n+ border: 1px solid #ddd;\n+ border-top: none;\n+ background-color: white;\n+ }\n+ .tab-content.active-content {\n+ display: block;\n+ }\n+ </style>\n+ </head>\n+ <body>\n+ <div class="container">\n+ \n+ <h1>PyCaret Model Training Report</h1>\n+ <div class="tabs">\n+ <div class="tab" onclick="openTab(event, \'summary\')">\n+ Setup & Best Model</div>\n+ <div class="tab" onclick="openTab(event, \'plots\')">\n+ Best Model Plots</div>\n+ <div class="tab" onclick="openTab(event, \'feature\')">\n+ Feature Importance</div>\n+ <div class="tab" onclick="openTab(event, \'explainer\')">\n+ Explainer\n+ </div>\n+ </div>\n+ <div id="summary" class="tab-content">\n+ <h2>Setup Parameters</h2>\n+ <table>\n+ <tr><th>Parameter</th><th>Value</th></tr>\n+ <table border="1" class="dataframe table">\n+ <tbody>\n+ <tr>\n+ <td>target</td>\n+ <td>MPG</td>\n+ </tr>\n+ <tr>\n+ <td>session_id</td>\n+ <td>42</td>\n+ </tr>\n+ <tr>\n+ <td>index</td>\n+ <td>False</td>\n+ </tr>\n+ </tbody>\n+</table>\n+ </table>\n+ <h5>If you want to know all the experiment setup parameters,\n+ please check the PyCaret documentation for\n+ the classification/regression <code>exp</code> function.</h5>\n+ <h2>Best Model: GradientBoostingRegressor</h2>\n+ <table>\n+ <tr><th>Parameter</th><th>Value</th></tr>\n+ <table border="1" class="dataframe table">\n+ <tbody>\n+ <tr>\n+ <td>alpha</td>\n+ <td>0.9</td>\n+ </tr>\n+ <tr>\n+ <td>ccp_alpha</td>\n+ <td>0.0</td>\n+ </tr>\n+ <tr>\n+ <td>criterion</td>\n+ <td>friedman_mse</td>\n+ </tr>\n+ <tr>\n+ <td>init</td>\n'..b'4518418529,1.701321418081566,-2.2588239552333427,1.492362176474547,-1.0724206234329046,1.9737832863179285,0.5199853248685802,-5.0056990737264435,1.236686729544326,1.4451807897120155,0.7262636050746671,4.4444356312918885,0.8494365567123658,-4.095854920030195,7.9407153645929895,-0.25720632541159105,0.21131763978828744,4.349059632965545,0.9196749429618585,2.0814149515752547,0.6019792135664233,-3.601156431027068,-0.6013492258084252,0.9345132637457922,-0.5174749410137434,1.6087394665852752,-0.7520637043872753,-6.02270933575047,0.05617917334626554,-0.9097071118381557,-0.5165771240391521,-0.9696269499004764,0.23406740116769242,0.2489212028300134,-0.2002555298390689,0.2343034819320735,3.248361149407529,0.1118685816874887,1.6071502052653397,-0.9039619757904092,-5.270275566564166,-5.039669374935023,-0.25161424006218525,-0.08145203225001474,-10.552159851916691,-2.0993719260999057,3.0288451176271707,-2.3759156847593985,2.059684218605213,-2.4196706272286157,0.7973082973331209,3.5827683604394345,0.3986507741915748,9.230263722015657,-1.341933793969119,-0.5742788589215309,1.7900192397035202,1.0494400196372808,0.8916022728823929,-0.7953197243476637,-1.2784370175099102,-2.4854357223045085,-1.5573562309395044,0.5182273715022063,4.742196249496811,-0.027352566725095784,-4.623515440043654,-1.2018440904121626,-2.8615905867127687,-2.7107153703433227,0.44363504247699836,0.9589812315648913,-4.733642743707957],"type":"scattergl"},{"hoverinfo":"none","mode":"lines","name":"Origin","x":[3.0,1.0,1.0,1.0,1.0,3.0,1.0,1.0,1.0,1.0,1.0,3.0,1.0,1.0,1.0,1.0,1.0,2.0,1.0,3.0,2.0,1.0,1.0,1.0,1.0,2.0,2.0,1.0,1.0,1.0,1.0,2.0,1.0,3.0,3.0,1.0,1.0,1.0,1.0,2.0,1.0,2.0,2.0,1.0,3.0,3.0,2.0,2.0,2.0,2.0,1.0,3.0,3.0,1.0,1.0,1.0,1.0,3.0,1.0,1.0,1.0,3.0,1.0,2.0,1.0,3.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,2.0,1.0,1.0,3.0,3.0,3.0,2.0,1.0,3.0,1.0,3.0,1.0,1.0,3.0,2.0,2.0,3.0,1.0,1.0,1.0,1.0,1.0,1.0,3.0,1.0,1.0,1.0,1.0,2.0,1.0,2.0,3.0,3.0,2.0,1.0,3.0,1.0,1.0,2.0,1.0,2.0,2.0],"y":[0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0],"type":"scattergl"}], {"hovermode":"closest","plot_bgcolor":"#fff","title":{"text":"Residuals vs Origin"},"xaxis":{"title":{"text":"Origin value"}},"yaxis":{"title":{"text":"residuals (y-preds)"},"range":[-11.116830704987294,9.231563684667769]},"template":{"data":{"scatter":[{"type":"scatter"}]}},"margin":{"t":60,"b":60,"l":40,"r":40}}, {"responsive": true} ) }; </script> </div><hr>\n+ \n+ </div>\n+ \n+ </div>\n+ <script>\n+ function openTab(evt, tabName) {{\n+ var i, tabcontent, tablinks;\n+ tabcontent = document.getElementsByClassName("tab-content");\n+ for (i = 0; i < tabcontent.length; i++) {{\n+ tabcontent[i].style.display = "none";\n+ }}\n+ tablinks = document.getElementsByClassName("tab");\n+ for (i = 0; i < tablinks.length; i++) {{\n+ tablinks[i].className =\n+ tablinks[i].className.replace(" active-tab", "");\n+ }}\n+ document.getElementById(tabName).style.display = "block";\n+ evt.currentTarget.className += " active-tab";\n+ }}\n+ document.addEventListener("DOMContentLoaded", function() {{\n+ document.querySelector(".tab").click();\n+ }});\n+ </script>\n+ </body>\n+ </html>\n+ \n+ \n\\ No newline at end of file\n'

diff -r 000000000000 -r 915447b14520 test-data/expected_model_classification.h5

Binary file test-data/expected_model_classification.h5 has changed

diff -r 000000000000 -r 915447b14520 test-data/expected_model_classification_customized.h5

Binary file test-data/expected_model_classification_customized.h5 has changed

diff -r 000000000000 -r 915447b14520 test-data/expected_model_regression.h5

Binary file test-data/expected_model_regression.h5 has changed

diff -r 000000000000 -r 915447b14520 test-data/pcr.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/pcr.tsv Wed Dec 11 05:00:00 2024 +0000

b'@@ -0,0 +1,70 @@\n+SCGB2A2\tFDCSP\tMUCL1\tPIP\tTFF1\tSCGB1D1\tSCGB1D2\tCALML5\tAGR2\tCPB1\tPCR\n+0.1446255786628313\t0.1699650673189346\t0.9375600251501058\t0.6976422301857574\t0.0957045465168299\t0.1228059681653541\t0.1203946505834535\t0.6617311325227386\t0.5265003537481928\t0.184448963872354\t0\n+0.3704854668147701\t0.3635509393089497\t0.2775010191320449\t0.3686096626765244\t0.2591694877907093\t0.4492672714584788\t0.4011669070407762\t0.5399668857265811\t0.138846165677197\t0.0619176638311385\t0\n+0.0418563439424193\t0.0619893695619427\t0.1075443408806682\t0.5687699992462811\t0.533482902986719\t0.0156581264745954\t0.0292513439097941\t0.237518325905719\t0.5496170291303947\t0.0824645446376087\t0\n+0.1909378671820324\t0.0613509694356973\t0.3629699235132763\t0.3137868894020268\t0.5779331343522767\t0.2338351111554182\t0.2060115379572571\t0.3704859920788323\t0.6821187978713588\t0.1031398012202455\t0\n+0.3100284435655261\t0.1002646849961624\t0.4381507762676965\t0.1638859006598466\t0.6850690758064408\t0.3223872661416277\t0.2810213714435557\t0.7561870445875668\t0.6041327017133716\t0.1048289561698074\t0\n+0.6841865332879608\t0.409759631592916\t0.8638163213133329\t0.7138334829351185\t0.1695946261760247\t0.5548865184594425\t0.5307263668545956\t0.6849694750585335\t0.3038312467316744\t0.1088018486111768\t1\n+0.7546533801614157\t0.1863769716880304\t0.4316006937007274\t0.4769875910458192\t0.5771919633020173\t0.7850330641677468\t0.748793759014029\t0.1828214236220012\t0.6132763234796518\t0.7975784274643083\t0\n+0.6353830250341901\t0.3786932164606809\t0.5679018316739328\t0.1040817305386349\t0.1566875663548406\t0.5730967334596087\t0.5204929854464402\t0.5372681453818717\t0.0586914392937339\t0.0612420018513138\t1\n+0.0166680578117074\t0.3562631356205751\t0.1643117231277334\t0.0396456150688968\t0.0395224454638328\t0.0423354268434519\t0.0142454438180149\t0.5395511338356394\t0.0002845365898673\t0.0720863766275008\t1\n+0.5102184953803285\t0.4378492371477142\t0.9462451029841566\t0.8621174019994107\t0.2530665117552399\t0.5187252849499885\t0.474170709322145\t0.7239699199859956\t0.632793995508936\t0.285088815767248\t0\n+0.0802840504150962\t0.423273629770965\t0.2952995557275221\t0.1087890477789274\t0.0522291887849468\t0.0730801182999368\t0.0557362003408942\t0.7561870445875668\t0.1211972069273124\t0.079768653338108\t1\n+0.3646619196517429\t0.4965031453759028\t0.5356212559852415\t0.5401492363456967\t0.3042607319564912\t0.4574153457614728\t0.4296053494165464\t0.7187110232602242\t0.2191393152665416\t0.1756923846138254\t1\n+0.2492696488535895\t0.1788094196297279\t0.3172368048310312\t0.4172587928163735\t0.2613195515581284\t0.2899212441431563\t0.2440540186180673\t0.2416685509223127\t0.3306930388507797\t0.0510057228569691\t0\n+0.1849281206058544\t0.404946525023133\t0.3782810869820563\t0.3862124253989571\t0.1854263068982325\t0.2116173196424418\t0.1936475678510554\t0.6157504321631497\t0.213740810237165\t0.1145247055802923\t1\n+0.1111803116592936\t0.0732366886400642\t0.658262569871002\t0.7094619132126927\t0.2154003325253901\t0.1214036486890638\t0.0958502687819588\t0.1784013245709367\t0.6119766833799871\t0.8450707080261888\t1\n+0.0712373272037342\t0.6312270911190652\t0.4727114439891937\t0.4745003186175425\t0.1220261346260424\t0.0398497989565679\t0.0539202832044053\t0.1447910664400697\t0.1038404749453997\t0.1136531016263183\t1\n+0.4914637200074481\t0.3637661303627403\t0.8727155896110713\t0.1880049060249549\t0.5807308881365894\t0.3977004619014389\t0.3426642192211879\t0.1356664065178225\t0.7099880033221571\t0.2581434160118376\t0\n+0.1879650972410383\t0.6843649353350882\t0.3442040751463059\t0.3207073994641743\t0.1664095938249101\t0.2249227395075267\t0.1813425986626459\t0.6826135476765304\t0.0589759758836014\t0.0306615406444463\t1\n+0.0640140741073664\t0.7704054916756926\t0.2979182068803504\t0.3257436122321728\t0.1189880010416458\t0.1843019971421925\t0.1558607578340107\t0.6701045214841611\t0.3286013104063491\t0.0820591474497138\t0\n+0.0302670356411359\t0.7082081040950856\t0.274744180583289\t0.0606127049601557\t0.0570301075699605\t0.0660818130462233\t0.0421200996459945\t0.5280705465313893\t0.1142837368113445\t0.0651473280947008\t1\n+0.0790063372007165\t0.0583813328933871\t0.0643607796532877\t0.4587818531'..b'593879\t0.3750134494408619\t0.0521519232230828\t0.0816414627628595\t0.1074364162042694\t0.0879805934935035\t0.0779598793759014\t0.3037614605290989\t0.083392291365468\t0.0954642811294366\t1\n+0.1357586341952011\t0.8754761102065116\t0.1697563065272815\t0.135731073090179\t0.1530885465702476\t0.1466786295816303\t0.1258555133079848\t0.562096556553198\t0.1268187271217201\t0.1500915521982663\t0\n+0.2510289122743937\t0.3406259190451255\t0.581907124251197\t0.7472232309874403\t0.6858369557233763\t0.1766656697570863\t0.127546872951357\t0.1621505313600921\t0.6288335537851057\t0.2456842091038695\t1\n+0.1956699005438307\t0.6519571626342255\t0.389108069452985\t0.1192040728229514\t0.0828842905123428\t0.2295018775130429\t0.1958437131244264\t0.5566990758637792\t0.0865606447445322\t0.1591859624467071\t0\n+0.3356469145473107\t0.3457905043360997\t0.2730859318321574\t0.6796352000438528\t0.4441484211721186\t0.2817731698401622\t0.2434836764127442\t0.5422498741803488\t0.5881448214340645\t0.6631892596771687\t1\n+0.1143970670382094\t0.5981809182919569\t0.180521104670043\t0.2306037288530453\t0.0763072320933741\t0.193726115707972\t0.1536580569031074\t0.8881919169079727\t0.450452182472546\t0.078484895576441\t0\n+0.5797222418409345\t0.2175079441364024\t0.2170785802626942\t0.4617350609484524\t0.3569039081749164\t0.480271159405842\t0.4535203880949259\t0.2057096593022661\t0.6266726137377342\t1.0000000000000002\t0\n+0.133877378055436\t0.8522713415727596\t0.1536851123808997\t0.2878452546542144\t0.0997108765182321\t0.1765327484797129\t0.1496459944932476\t0.431338939905617\t0.2374342489772063\t0.138774214036202\t0\n+0.7981790981527734\t0.2165897956402292\t0.7117894576835645\t0.6045784998252742\t0.6593417599807698\t0.8219519489582297\t0.7853284384423758\t0.5817317160341645\t0.4874650096896245\t0.8523475875489013\t0\n+0.2857518924923112\t0.924087769257806\t0.2979873005783142\t0.2512282192362771\t0.2142184651749765\t0.3015717941049414\t0.2615051789694506\t0.6093245125856119\t0.1986834415085053\t0.1229502104687067\t1\n+0.0858058261154308\t0.8986163215241265\t0.4177335885193911\t0.2842205518592875\t0.1110287587721933\t0.2480909181537235\t0.2154910187491805\t0.5918994026301777\t0.0901596480974499\t0.0779511226123795\t1\n+0.4424162263157557\t0.4638156243051122\t0.9354181205132276\t0.6676031053219408\t0.4695285217310017\t0.484285381982521\t0.4451094794807919\t0.540032530761993\t0.6746285643975516\t0.1133828368343885\t0\n+0.1224678484978843\t0.6581116267726363\t0.6069466803932813\t0.0373639023454362\t0.0411183002477246\t0.1476356627787193\t0.1003671168218172\t0.7746478873239436\t0.013611615245009\t0.0415734816186158\t0\n+0.8225776419449493\t0.6945506452145096\t0.9266570396114168\t0.366999444988797\t0.5741872158009655\t0.5425381317914466\t0.488796381277042\t0.5683838921670884\t0.7889199913870006\t0.0824645446376087\t1\n+0.1426223298041053\t0.8913285178357517\t0.0756023243119994\t0.1456733108131257\t0.0894279628479664\t0.1422191207257502\t0.1296184607316113\t0.6501192551476648\t0.1399920022147712\t0.136152645554482\t1\n+0.7436098287607467\t0.0508783381512219\t0.9367999944725042\t0.8668932391413087\t0.6116530785307452\t0.6847572525171968\t0.6387570473318473\t0.0916842327918833\t0.5387431172905965\t0.5302865482456436\t0\n+0.5959857974792453\t0.3464289044623451\t0.422286863215206\t0.1385540930363224\t0.4875369750873046\t0.4499052935898715\t0.4171758227350202\t0.0580812685538398\t0.5668276477283215\t0.0697958825158949\t0\n+0.5064559831007982\t0.1684730760126532\t0.8784434571722713\t0.6313218174218702\t0.1444549054172259\t0.304369786993653\t0.2628228661334732\t0.7438093084660212\t0.6597249992309823\t0.4005459348796984\t1\n+0.6454570553525911\t0.1390421128892268\t0.3442040751463059\t0.3152600672865434\t0.3017567757056149\t0.6874954308310904\t0.6415169791530091\t0.1059292054762546\t0.5925359131317481\t0.203847219313122\t0\n+0.0556351005155797\t0.9816872413224208\t0.0713945181060035\t0.0874862103698019\t0.0640745711557594\t0.0695842887050144\t0.030064245443818\t0.4076192004434686\t0.1690147343812482\t0.0523502901968203\t1\n+0.3732206719872614\t0.2336401001355704\t0.362348080231602\t0.2704480516365979\t0.6884010069242736\t0.3512311833316719\t0.3141667759276256\t0.0\t0.7696176443446432\t0.3280676743038993\t0\n'

diff -r 000000000000 -r 915447b14520 test-data/predictions_classification.csv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/predictions_classification.csv Wed Dec 11 05:00:00 2024 +0000

b'@@ -0,0 +1,70 @@\n+SCGB2A2,FDCSP,MUCL1,PIP,TFF1,SCGB1D1,SCGB1D2,CALML5,AGR2,CPB1,PCR,prediction_label,prediction_score\n+0.14462557,0.16996507,0.93756,0.6976422,0.09570455,0.12280597,0.12039465,0.6617311,0.52650034,0.18444896,0,0,0.7168\n+0.37048545,0.36355093,0.27750102,0.36860967,0.2591695,0.44926727,0.40116692,0.5399669,0.13884616,0.061917663,0,1,0.8633\n+0.041856345,0.06198937,0.10754434,0.56877,0.5334829,0.015658127,0.029251345,0.23751833,0.54961705,0.082464546,0,0,0.6666\n+0.19093786,0.061350968,0.36296993,0.3137869,0.57793313,0.23383512,0.20601153,0.370486,0.6821188,0.1031398,0,0,0.859\n+0.31002843,0.10026468,0.43815076,0.1638859,0.6850691,0.32238728,0.28102136,0.756187,0.6041327,0.104828954,0,0,0.8391\n+0.6841865,0.40975964,0.8638163,0.7138335,0.16959463,0.5548865,0.5307264,0.6849695,0.30383125,0.10880185,1,1,0.8534\n+0.7546534,0.18637697,0.4316007,0.4769876,0.57719195,0.78503305,0.7487938,0.18282142,0.6132763,0.79757845,0,0,0.7254\n+0.635383,0.37869322,0.56790185,0.10408173,0.15668757,0.57309675,0.520493,0.53726816,0.05869144,0.061242003,1,1,0.7164\n+0.016668057,0.35626313,0.16431172,0.039645616,0.039522447,0.04233543,0.014245444,0.53955114,0.0002845366,0.07208638,1,1,0.7462\n+0.5102185,0.43784922,0.9462451,0.8621174,0.2530665,0.5187253,0.4741707,0.72396994,0.632794,0.2850888,0,0,0.7465\n+0.08028405,0.42327362,0.29529956,0.10878905,0.05222919,0.073080115,0.0557362,0.756187,0.12119721,0.07976865,1,1,0.6899\n+0.36466193,0.49650314,0.5356213,0.5401492,0.30426073,0.45741534,0.42960533,0.718711,0.21913932,0.17569238,1,1,0.6959\n+0.24926965,0.17880942,0.3172368,0.4172588,0.26131955,0.28992125,0.24405402,0.24166855,0.33069304,0.05100572,0,0,0.5877\n+0.18492812,0.40494654,0.3782811,0.38621244,0.18542631,0.21161732,0.19364756,0.61575043,0.21374081,0.11452471,1,1,0.865\n+0.11118031,0.07323669,0.65826255,0.7094619,0.21540034,0.12140365,0.09585027,0.17840132,0.6119767,0.8450707,1,0,0.7168\n+0.071237326,0.6312271,0.47271144,0.47450033,0.12202614,0.0398498,0.053920284,0.14479107,0.10384048,0.1136531,1,1,0.8216\n+0.49146372,0.36376613,0.8727156,0.18800491,0.5807309,0.39770046,0.3426642,0.1356664,0.709988,0.25814342,0,0,0.9118\n+0.1879651,0.6843649,0.34420407,0.3207074,0.1664096,0.22492275,0.1813426,0.68261355,0.058975976,0.03066154,1,1,0.8174\n+0.06401408,0.7704055,0.2979182,0.32574362,0.118988,0.184302,0.15586075,0.6701045,0.3286013,0.082059145,0,0,0.6945\n+0.030267036,0.7082081,0.27474418,0.060612705,0.057030108,0.066081814,0.0421201,0.52807057,0.11428374,0.065147325,1,1,0.6086\n+0.07900634,0.058381334,0.06436078,0.45878184,0.24089395,0.26133654,0.22898912,0.14880271,0.46996987,0.25565022,0,0,0.7225\n+0.26116714,0.99817085,0.033247888,0.15212789,0.07527894,0.25669092,0.21542546,0.23945121,0.06935003,0.09445079,0,1,0.6018\n+0.052180782,0.06191764,0.2881967,0.59275883,0.7092673,0.30055496,0.26727417,0.17701548,0.8361608,0.21651588,0,0,0.7553\n+0.15154064,0.36086106,0.10230013,0.063168496,0.037465863,0.116312765,0.08260129,0.506911,0.007920884,0.053025953,1,1,0.7462\n+0.34851393,0.16911148,0.24853003,0.47067004,0.392834,0.42405877,0.38064113,0.26877266,0.35848534,0.5235502,1,0,0.6467\n+0.55689675,0.16231862,0.16114032,0.13956134,0.536728,0.4711727,0.44586337,0.14409815,0.5779861,0.1824963,0,0,0.8867\n+0.08489409,0.2717074,0.16382806,0.3765374,0.59231585,0.10141229,0.0962895,0.21850315,0.55069363,0.8636582,1,0,0.75\n+0.18098582,0.46112573,0.46657592,0.28838655,0.14581038,0.16100089,0.13144094,0.48029554,0.36770585,0.14153767,1,0,0.9076\n+0.69255906,0.11413734,0.27219462,0.6383794,0.76409394,0.60588175,0.5770552,0.24367437,0.73750347,0.35291854,0,0,0.7254\n+0.45753047,0.18616179,0.09471364,0.29994586,0.44350073,0.5126175,0.46067262,0.117752604,0.8363761,0.0629244,0,0,0.8391\n+0.424509,0.0815861,0.7912541,0.2688995,0.5514647,0.55692685,0.5007211,0.08518537,0.456935,0.12065972,0,0,0.8391\n+0.29825294,0.79552543,0.13871942,0.13996561,0.06304628,0.30927458,0.26702505,0.64154166,0.11010797,0.07855246,1,1,0.6384\n+0.23360963,0.40621614,0.9879086,0.96619916,0.16856633,0.1614'..b'.2509483,0.38191622,0.6583068,0.18207556,0.1503999,0.05331106,0.7082654,0.09101842,0,0,0.6729\n+0.416252,0.27503568,0.6070849,0.5545247,0.041939598,0.24351178,0.21636948,0.7969891,0.032191087,0.063667625,1,1,0.8943\n+0.40533045,0.93095237,0.21694039,0.08319001,0.12644646,0.29271924,0.25673267,0.18061137,0.2547833,0.06488382,1,1,0.5404\n+0.4072117,0.14703287,0.2906081,0.25318104,0.2807102,0.47677532,0.45251736,0.45518997,0.5452182,0.20095539,0,0,0.8867\n+0.78318685,0.4952335,0.8876882,0.8637345,0.75194806,0.84525305,0.8152681,0.4242128,0.8149128,0.21475916,0,0,0.8332\n+0.16768862,0.19161329,0.33034387,0.24578774,0.61686796,0.152474,0.12591451,0.15786172,0.7106417,0.04191131,0,0,0.8555\n+0.13539907,0.44768345,0.085123435,0.10919332,0.049785327,0.1853853,0.14041562,0.6329713,0.03795103,0.074106604,1,1,0.6838\n+0.13460934,0.8714449,0.32027003,0.28300774,0.1089655,0.17901838,0.1542284,0.16401777,0.1901858,0.055384014,1,1,0.6086\n+0.47749877,0.8638056,0.09643407,0.53436613,0.0,0.24006912,0.21392421,0.6193463,0.050839767,0.09216029,0,1,0.8174\n+0.020372784,0.9942114,0.33675578,0.047038913,0.04203308,0.013112684,0.0,0.2207132,0.1121997,0.033627696,1,1,0.6086\n+0.29710364,0.5374755,0.7081344,0.62460685,0.17053612,0.22262985,0.19992132,0.16263193,0.14914331,0.15460497,1,1,0.749\n+0.13302985,0.1302695,0.34013444,0.34240764,0.759934,0.11197953,0.11103973,0.3139948,0.4837199,0.91532606,0,0,0.75\n+0.076277554,0.48963854,0.11030118,0.09165222,0.082557105,0.06041272,0.06659892,0.61982775,0.096134916,0.1151328,0,1,0.6086\n+0.07051179,0.37501344,0.052151922,0.081641465,0.10743642,0.08798059,0.07795988,0.30376145,0.08339229,0.09546428,1,1,0.6899\n+0.13575864,0.8754761,0.16975631,0.13573107,0.15308854,0.14667863,0.12585552,0.56209654,0.12681873,0.15009156,0,1,0.509\n+0.25102893,0.3406259,0.58190715,0.74722326,0.685837,0.17666566,0.12754688,0.16215053,0.62883353,0.2456842,1,0,0.75\n+0.1956699,0.65195715,0.38910806,0.119204074,0.08288429,0.22950187,0.19584371,0.5566991,0.086560644,0.15918596,0,1,0.5019\n+0.33564693,0.3457905,0.27308592,0.6796352,0.44414842,0.28177318,0.24348368,0.54224986,0.58814484,0.66318923,1,0,0.7553\n+0.114397064,0.5981809,0.1805211,0.23060372,0.07630723,0.19372612,0.15365806,0.88819194,0.45045218,0.07848489,0,0,0.9067\n+0.5797222,0.21750794,0.21707858,0.46173507,0.3569039,0.48027116,0.4535204,0.20570967,0.6266726,1.0,0,0,0.7254\n+0.13387738,0.8522713,0.15368511,0.28784525,0.099710874,0.17653275,0.149646,0.43133894,0.23743425,0.13877422,0,0,0.5535\n+0.7981791,0.2165898,0.7117894,0.6045785,0.65934175,0.8219519,0.78532845,0.58173174,0.48746502,0.8523476,0,0,0.7254\n+0.28575188,0.92408776,0.2979873,0.2512282,0.21421847,0.3015718,0.2615052,0.6093245,0.19868344,0.12295021,1,1,0.6384\n+0.085805826,0.8986163,0.41773358,0.28422055,0.11102876,0.24809092,0.21549101,0.5918994,0.09015965,0.077951126,1,1,0.6018\n+0.44241622,0.46381563,0.9354181,0.66760314,0.46952853,0.48428538,0.4451095,0.5400325,0.67462856,0.11338284,0,0,0.769\n+0.122467846,0.65811163,0.6069467,0.0373639,0.0411183,0.14763567,0.100367114,0.7746479,0.013611616,0.04157348,0,1,0.6086\n+0.82257766,0.69455063,0.926657,0.36699945,0.5741872,0.5425381,0.48879638,0.5683839,0.78892,0.082464546,1,0,0.769\n+0.14262234,0.8913285,0.07560232,0.1456733,0.08942796,0.14221913,0.12961847,0.65011925,0.139992,0.13615264,1,1,0.6086\n+0.74360985,0.05087834,0.9368,0.86689323,0.6116531,0.68475723,0.63875705,0.09168423,0.53874314,0.53028655,0,0,0.7254\n+0.5959858,0.3464289,0.42228687,0.1385541,0.48753697,0.4499053,0.41717583,0.05808127,0.56682765,0.069795884,0,0,0.8391\n+0.50645596,0.16847308,0.8784435,0.6313218,0.14445491,0.30436978,0.26282287,0.7438093,0.659725,0.40054592,1,0,0.6903\n+0.645457,0.13904211,0.34420407,0.31526005,0.30175677,0.6874954,0.641517,0.1059292,0.5925359,0.20384721,0,0,0.8867\n+0.055635102,0.98168725,0.07139452,0.08748621,0.06407457,0.06958429,0.030064246,0.4076192,0.16901474,0.05235029,1,1,0.6086\n+0.37322068,0.2336401,0.36234808,0.27044806,0.688401,0.3512312,0.31416678,0.0,0.7696176,0.32806766,0,0,0.8867\n'

diff -r 000000000000 -r 915447b14520 test-data/predictions_regression.csv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/predictions_regression.csv Wed Dec 11 05:00:00 2024 +0000

b'@@ -0,0 +1,399 @@\n+Cylinders,Displacement,Horsepower,Weight,Acceleration,ModelYear,Origin,MPG,prediction_label\n+8,307.0,130.0,3504.0,12.0,70,1,18.0,15.496568242719972\n+8,350.0,165.0,3693.0,11.5,70,1,15.0,15.295167199600813\n+8,318.0,150.0,3436.0,11.0,70,1,18.0,15.751078797169987\n+8,304.0,150.0,3433.0,12.0,70,1,16.0,15.751078797169987\n+8,302.0,140.0,3449.0,10.5,70,1,17.0,15.496568242719972\n+8,429.0,198.0,4341.0,10.0,70,1,15.0,13.47524684916836\n+8,454.0,220.0,4354.0,9.0,70,1,14.0,13.584079309576778\n+8,440.0,215.0,4312.0,8.5,70,1,14.0,13.192384042871243\n+8,455.0,225.0,4425.0,10.0,70,1,14.0,12.69959324150899\n+8,390.0,190.0,3850.0,8.5,70,1,15.0,13.526017793240083\n+8,383.0,170.0,3563.0,10.0,70,1,15.0,15.382442402453687\n+8,340.0,160.0,3609.0,8.0,70,1,14.0,14.948584364357012\n+8,400.0,150.0,3761.0,9.5,70,1,15.0,13.984349684908732\n+8,455.0,225.0,3086.0,10.0,70,1,14.0,15.718811053746911\n+4,113.0,95.00,2372.0,15.0,70,3,24.0,24.04107163431282\n+6,198.0,95.00,2833.0,15.5,70,1,22.0,18.69046864753495\n+6,199.0,97.00,2774.0,15.5,70,1,18.0,18.57427885892153\n+6,200.0,85.00,2587.0,16.0,70,1,21.0,20.821149210852692\n+4,97.0,88.00,2130.0,14.5,70,3,27.0,28.260061496908907\n+4,97.0,46.00,1835.0,20.5,70,2,26.0,26.795319724347664\n+4,110.0,87.00,2672.0,17.5,70,2,25.0,22.758221490905793\n+4,107.0,90.00,2430.0,14.5,70,2,24.0,26.08420833813644\n+4,104.0,95.00,2375.0,17.5,70,2,25.0,23.298678581918434\n+4,121.0,113.0,2234.0,12.5,70,2,26.0,25.61527601855662\n+6,199.0,90.00,2648.0,15.0,70,1,21.0,21.62386372514712\n+8,360.0,215.0,4615.0,14.0,70,1,10.0,12.499203604751683\n+8,307.0,200.0,4376.0,15.0,70,1,10.0,12.127714793001413\n+8,318.0,210.0,4382.0,13.5,70,1,11.0,12.328052408601287\n+8,304.0,193.0,4732.0,18.5,70,1,9.0,10.069501289521417\n+4,97.0,88.00,2130.0,14.5,71,3,27.0,28.260061496908907\n+4,140.0,90.00,2264.0,15.5,71,1,28.0,24.1187999092876\n+4,113.0,95.00,2228.0,14.0,71,3,25.0,24.08032505703814\n+4,98.0,?,2046.0,19.0,71,1,25.0,26.726784109653302\n+6,232.0,100.0,2634.0,13.0,71,1,19.0,20.36926456258466\n+6,225.0,105.0,3439.0,15.5,71,1,16.0,17.213306929165558\n+6,250.0,100.0,3329.0,15.5,71,1,17.0,16.7726121207363\n+6,250.0,88.00,3302.0,15.5,71,1,19.0,18.032001571463223\n+6,232.0,100.0,3288.0,15.5,71,1,18.0,17.508638427058603\n+8,350.0,165.0,4209.0,12.0,71,1,14.0,13.872600628483884\n+8,400.0,175.0,4464.0,11.5,71,1,14.0,12.763313270455674\n+8,351.0,153.0,4154.0,13.5,71,1,14.0,13.800661505379043\n+8,318.0,150.0,4096.0,13.0,71,1,14.0,13.986726468750168\n+8,383.0,180.0,4955.0,11.5,71,1,12.0,12.47748465212163\n+8,400.0,170.0,4746.0,12.0,71,1,13.0,12.598632864333123\n+8,400.0,175.0,5140.0,12.0,71,1,13.0,12.47748465212163\n+6,258.0,110.0,2962.0,13.5,71,1,18.0,18.6134791783886\n+4,140.0,72.00,2408.0,19.0,71,1,22.0,20.507637823525453\n+6,250.0,100.0,3282.0,15.0,71,1,19.0,17.2851327993371\n+6,250.0,88.00,3139.0,14.5,71,1,18.0,18.3862192067906\n+4,122.0,86.00,2220.0,14.0,71,1,23.0,24.55551066818654\n+4,116.0,90.00,2123.0,14.0,71,2,28.0,27.310511747713683\n+4,79.0,70.00,2074.0,19.5,71,2,30.0,30.428927901521124\n+4,88.0,76.00,2065.0,14.5,71,2,30.0,28.57037976485042\n+4,71.0,65.00,1773.0,19.0,71,3,31.0,36.65148739336434\n+4,72.0,69.00,1613.0,18.0,71,3,35.0,34.59693136902584\n+4,97.0,60.00,1834.0,19.0,71,2,27.0,29.361661670375785\n+4,91.0,70.00,1955.0,20.5,71,1,26.0,30.16300924555696\n+4,113.0,95.00,2278.0,15.5,72,3,24.0,22.77137953569319\n+4,97.5,80.00,2126.0,17.0,72,1,25.0,26.98790612863828\n+4,97.0,54.00,2254.0,23.5,72,2,23.0,22.765419622541582\n+4,140.0,90.00,2408.0,19.5,72,1,20.0,23.424826127292256\n+4,122.0,86.00,2226.0,16.5,72,1,21.0,23.87254823275849\n+8,350.0,165.0,4274.0,12.0,72,1,13.0,13.339654598544426\n+8,400.0,175.0,4385.0,12.0,72,1,14.0,12.39284979473466\n+8,318.0,150.0,4135.0,13.5,72,1,15.0,13.601349225808425\n+8,351.0,153.0,4129.0,13.0,72,1,14.0,13.673288348913266\n+8,304.0,150.0,3672.0,11.5,72,1,17.0,15.20998076029648\n+8,429.0,208.0,4633.0,11.0,72,1,11.0,12.500280961938373\n+8,350.0,155.0,4502.0,13.5,72,1,13.0,12.492061867780215\n+8,350.0,160.0,4456.0,13.5,72,1,12.0,12.492061867780215\n+8,400.0,190.0,4422.0,'..b',48.00,2085.0,21.7,80,2,44.3,44.04873495309377\n+4,90.0,48.00,2335.0,23.7,80,2,43.4,43.292619102967045\n+5,121.0,67.00,2950.0,19.9,80,2,36.4,38.58419358793866\n+4,146.0,67.00,3250.0,21.8,80,2,30.0,33.302919568489195\n+4,91.0,67.00,1850.0,13.8,80,3,44.6,46.14809114751762\n+4,85.0,?,1835.0,17.3,80,2,40.9,39.46832996122683\n+4,97.0,67.00,2145.0,18.0,80,3,33.8,39.84775137517216\n+4,89.0,62.00,1845.0,15.3,80,2,29.8,33.85934307396341\n+6,168.0,132.0,2910.0,11.4,80,3,32.7,31.507629280182147\n+3,70.0,100.0,2420.0,12.5,80,3,23.7,34.252160614856145\n+4,122.0,88.00,2500.0,15.1,80,2,35.0,33.332770679285915\n+4,140.0,?,2905.0,14.3,80,1,23.6,26.285247006183862\n+4,107.0,72.00,2290.0,17.0,80,3,32.4,30.426218239560978\n+4,135.0,84.00,2490.0,15.7,81,1,27.2,29.179564839088524\n+4,151.0,84.00,2635.0,16.4,81,1,26.6,27.42833395630787\n+4,156.0,92.00,2620.0,14.4,81,1,25.8,27.767804618782556\n+6,173.0,110.0,2725.0,12.6,81,1,23.5,28.723699283328042\n+4,135.0,84.00,2385.0,12.9,81,1,30.0,30.848247701381503\n+4,79.0,58.00,1755.0,16.9,81,3,39.1,38.61796233140963\n+4,86.0,64.00,1875.0,16.4,81,1,39.0,38.94528588928582\n+4,81.0,60.00,1760.0,16.1,81,3,35.1,37.52298925893553\n+4,97.0,67.00,2065.0,17.8,81,3,32.3,38.59838821632358\n+4,85.0,65.00,1975.0,19.4,81,3,37.0,43.206974575255295\n+4,89.0,62.00,2050.0,17.3,81,3,37.7,36.9578122358848\n+4,91.0,68.00,1985.0,16.0,81,3,34.1,35.72250991701692\n+4,105.0,63.00,2215.0,14.9,81,1,34.7,35.13310280648725\n+4,98.0,65.00,2045.0,16.2,81,1,34.4,38.20355220541596\n+4,98.0,65.00,2380.0,20.7,81,1,29.9,39.547568414776244\n+4,105.0,74.00,2190.0,14.2,81,2,33.0,37.308472648509166\n+4,100.0,?,2320.0,15.8,81,2,34.5,33.30558577268826\n+4,107.0,75.00,2210.0,14.4,81,3,33.7,34.797069863275595\n+4,108.0,75.00,2350.0,16.8,81,3,32.4,33.06724783302362\n+4,119.0,100.0,2615.0,14.8,81,3,32.9,31.56081067650422\n+4,120.0,74.00,2635.0,18.3,81,3,31.6,32.246580792714646\n+4,141.0,80.00,3230.0,20.4,81,2,28.1,29.09190981295705\n+6,145.0,76.00,3160.0,19.6,81,2,30.7,29.356329134838397\n+6,168.0,116.0,2900.0,12.6,81,3,25.4,26.199966865873854\n+6,146.0,120.0,2930.0,13.8,81,3,24.2,26.362133705892468\n+6,231.0,110.0,3415.0,15.8,81,1,22.4,21.563875658680523\n+8,350.0,105.0,3725.0,19.0,81,1,26.6,25.045716233576467\n+6,200.0,88.00,3060.0,17.1,81,1,20.2,21.068486942801076\n+6,225.0,85.00,3465.0,16.6,81,1,17.6,20.26876649009557\n+4,112.0,88.00,2605.0,19.6,82,1,28.0,29.84512626786672\n+4,112.0,88.00,2640.0,18.6,82,1,27.0,29.84512626786672\n+4,112.0,88.00,2395.0,18.0,82,1,34.0,33.5742600443853\n+4,112.0,85.00,2575.0,16.2,82,1,31.0,29.023945732632264\n+4,135.0,84.00,2525.0,16.0,82,1,29.0,31.419670627228616\n+4,151.0,90.00,2735.0,18.0,82,1,27.0,27.892469792324\n+4,140.0,92.00,2865.0,16.4,82,1,24.0,27.239415939551016\n+4,151.0,?,3035.0,20.5,82,1,23.0,25.196343356743597\n+4,105.0,74.00,1980.0,15.3,82,2,36.0,35.440784799991576\n+4,91.0,68.00,2025.0,18.2,82,3,37.0,37.32728185546619\n+4,91.0,68.00,1970.0,17.6,82,3,31.0,37.1735402776545\n+4,105.0,63.00,2125.0,14.7,82,1,38.0,37.73595783104049\n+4,98.0,70.00,2125.0,17.3,82,1,36.0,35.01865903545254\n+4,120.0,88.00,2160.0,14.5,82,3,36.0,34.738059281798506\n+4,107.0,75.00,2205.0,14.5,82,3,36.0,36.42428637384083\n+4,108.0,70.00,2245.0,16.9,82,3,34.0,34.25449780118571\n+4,91.0,67.00,1965.0,15.0,82,3,38.0,43.01433472159814\n+4,91.0,67.00,1965.0,15.7,82,3,32.0,41.20814622179627\n+4,91.0,67.00,1995.0,16.2,82,3,38.0,42.36134240378185\n+6,181.0,110.0,2945.0,16.4,82,1,25.0,31.24070954535207\n+6,262.0,85.00,3015.0,17.0,82,1,38.0,31.808149864254364\n+4,156.0,92.00,2585.0,14.5,82,1,26.0,28.31246584775401\n+6,232.0,112.0,2835.0,14.7,82,1,22.0,33.116830704987294\n+4,144.0,96.00,2665.0,13.9,82,3,32.0,31.70802240889664\n+4,135.0,84.00,2370.0,13.0,82,1,36.0,33.09672469004753\n+4,151.0,90.00,2950.0,17.3,82,1,27.0,26.101102668325314\n+4,140.0,86.00,2790.0,15.6,82,1,27.0,25.674883798281442\n+4,97.0,52.00,2130.0,24.6,82,2,44.0,34.812667353374415\n+4,135.0,84.00,2295.0,11.6,82,1,32.0,32.77687416139507\n+4,120.0,79.00,2625.0,18.6,82,1,28.0,27.852079937244017\n+4,119.0,82.00,2720.0,19.4,82,1,31.0,30.026254460876412\n'

diff -r 000000000000 -r 915447b14520 utils.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/utils.py Wed Dec 11 05:00:00 2024 +0000

[

@@ -0,0 +1,157 @@
+import base64
+import logging
+
+logging.basicConfig(level=logging.DEBUG)
+LOG = logging.getLogger(__name__)
+
+
+def get_html_template():
+    return """
+    <html>
+    <head>
+        <title>Model Training Report</title>
+        <style>
+          body {
+              font-family: Arial, sans-serif;
+              margin: 0;
+              padding: 20px;
+              background-color: #f4f4f4;
+          }
+          .container {
+              max-width: 800px;
+              margin: auto;
+              background: white;
+              padding: 20px;
+              box-shadow: 0 0 10px rgba(0, 0, 0, 0.1);
+          }
+          h1 {
+              text-align: center;
+              color: #333;
+          }
+          h2 {
+              border-bottom: 2px solid #4CAF50;
+              color: #4CAF50;
+              padding-bottom: 5px;
+          }
+          table {
+              width: 100%;
+              border-collapse: collapse;
+              margin: 20px 0;
+          }
+          table, th, td {
+              border: 1px solid #ddd;
+          }
+          th, td {
+              padding: 8px;
+              text-align: left;
+          }
+          th {
+              background-color: #4CAF50;
+              color: white;
+          }
+          .plot {
+              text-align: center;
+              margin: 20px 0;
+          }
+          .plot img {
+              max-width: 100%;
+              height: auto;
+          }
+          .tabs {
+              display: flex;
+              margin-bottom: 20px;
+              cursor: pointer;
+              justify-content: space-around;
+          }
+          .tab {
+              padding: 10px;
+              background-color: #4CAF50;
+              color: white;
+              border-radius: 5px 5px 0 0;
+              flex-grow: 1;
+              text-align: center;
+              margin: 0 5px;
+          }
+          .tab.active-tab {
+              background-color: #333;
+          }
+          .tab-content {
+              display: none;
+              padding: 20px;
+              border: 1px solid #ddd;
+              border-top: none;
+              background-color: white;
+          }
+          .tab-content.active-content {
+              display: block;
+          }
+      </style>
+    </head>
+    <body>
+    <div class="container">
+    """
+
+
+def get_html_closing():
+    return """
+        </div>
+        <script>
+            function openTab(evt, tabName) {{
+                var i, tabcontent, tablinks;
+                tabcontent = document.getElementsByClassName("tab-content");
+                for (i = 0; i < tabcontent.length; i++) {{
+                    tabcontent[i].style.display = "none";
+                }}
+                tablinks = document.getElementsByClassName("tab");
+                for (i = 0; i < tablinks.length; i++) {{
+                    tablinks[i].className =
+                        tablinks[i].className.replace(" active-tab", "");
+                }}
+                document.getElementById(tabName).style.display = "block";
+                evt.currentTarget.className += " active-tab";
+            }}
+            document.addEventListener("DOMContentLoaded", function() {{
+                document.querySelector(".tab").click();
+            }});
+        </script>
+    </body>
+    </html>
+    """
+
+
+def customize_figure_layout(fig, margin_dict=None):
+    """
+    Update the layout of a Plotly figure to reduce margins.
+
+    Parameters:
+        fig (plotly.graph_objects.Figure): The Plotly figure to customize.
+        margin_dict (dict, optional): A dictionary specifying margin sizes.
+            Example: {'l': 10, 'r': 10, 't': 10, 'b': 10}
+
+    Returns:
+        plotly.graph_objects.Figure: The updated Plotly figure.
+    """
+    if margin_dict is None:
+        # Set default smaller margins
+        margin_dict = {'l': 40, 'r': 40, 't': 40, 'b': 40}
+
+    fig.update_layout(margin=margin_dict)
+    return fig
+
+
+def add_plot_to_html(fig, include_plotlyjs=True):
+    custom_margin = {'l': 40, 'r': 40, 't': 60, 'b': 60}
+    fig = customize_figure_layout(fig, margin_dict=custom_margin)
+    return fig.to_html(full_html=False,
+                       default_height=350,
+                       include_plotlyjs="cdn" if include_plotlyjs else False)
+
+
+def add_hr_to_html():
+    return "<hr>"
+
+
+def encode_image_to_base64(image_path):
+    """Convert an image file to a base64 encoded string."""
+    with open(image_path, "rb") as img_file:
+        return base64.b64encode(img_file.read()).decode("utf-8")