pycaret_compare: pycaret_train.xml comparison

comparison pycaret_train.xml @ 0:915447b14520 draft

planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit d79b0f722b7d09505a526d1a4332f87e548a3df1

author	goeckslab
date	Wed, 11 Dec 2024 05:00:00 +0000
parents
children	f6def1b90150

comparison

equal deleted inserted replaced

--1:000000000000
+:915447b14520
+<tool id="pycaret_compare" name="PyCaret Model Comparison" version="@VERSION@" profile="@PROFILE@">
+<description>compares different machine learning models on a dataset using PyCaret. Do feature analyses using Random Forest and LightGBM. </description>
+<macros>
+<import>pycaret_macros.xml</import>
+</macros>
+<expand macro="python_requirements" />
+<command>
+<![CDATA[
+python $__tool_directory__/pycaret_train.py --input_file $input_file --target_col $target_feature --output_dir "`pwd`" --random_seed $random_seed
+#if $model_type == "classification"
+#if $classification_models
+--models $classification_models
+#end if
+#end if
+#if $model_type == "regression"
+#if $regression_models
+--models $regression_models
+#end if
+#end if
+#if $customize_defaults == "true"
+#if $train_size
+--train_size $train_size
+#end if
+#if $normalize
+--normalize
+#end if
+#if $feature_selection
+--feature_selection
+#end if
+#if $enable_cross_validation == "true"
+--cross_validation
+#end if
+#if $cross_validation_folds
+--cross_validation_folds $cross_validation_folds
+#end if
+#if $remove_outliers
+--remove_outliers
+#end if
+#if $remove_multicollinearity
+--remove_multicollinearity
+#end if
+#if $polynomial_features
+--polynomial_features
+#end if
+#if $fix_imbalance
+--fix_imbalance
+#end if
+#end if
+#if $test_file
+--test_file $test_file
+#end if
+--model_type $model_type
+]]>
+</command>
+<inputs>
+<param name="input_file" type="data" format="csv,tabular" label="Train Dataset (CSV or TSV)" />
+<param name="test_file" type="data" format="csv,tabular" optional="true" label="Test Dataset (CSV or TSV)"
+help="If a test set is not provided,
+the selected training set will be split into training, validation, and test sets.
+If a test set is provided, the training set will only be split into training and validation sets.
+BTW, cross-validation is always applied by default." />
+<param name="target_feature" multiple="false" type="data_column" use_header_names="true" data_ref="input_file" label="Select the target column:" />
+<conditional name="model_selection">
+<param name="model_type" type="select" label="Task">
+<option value="classification">classification</option>
+<option value="regression">regression</option>
+</param>
+<when value="classification">
+<param name="classification_models" type="select" multiple="true" label="Only Select Classification Models if you don't want to compare all models">
+<option value="lr">Logistic Regression</option>
+<option value="knn">K Neighbors Classifier</option>
+<option value="nb">Naive Bayes</option>
+<option value="dt">Decision Tree Classifier</option>
+<option value="svm">SVM - Linear Kernel</option>
+<option value="rbfsvm">SVM - Radial Kernel</option>
+<option value="gpc">Gaussian Process Classifier</option>
+<option value="mlp">MLP Classifier</option>
+<option value="ridge">Ridge Classifier</option>
+<option value="rf">Random Forest Classifier</option>
+<option value="qda">Quadratic Discriminant Analysis</option>
+<option value="ada">Ada Boost Classifier</option>
+<option value="gbc">Gradient Boosting Classifier</option>
+<option value="lda">Linear Discriminant Analysis</option>
+<option value="et">Extra Trees Classifier</option>
+<option value="xgboost">Extreme Gradient Boosting</option>
+<option value="lightgbm">Light Gradient Boosting Machine</option>
+<option value="catboost">CatBoost Classifier</option>
+</param>
+</when>
+<when value="regression">
+<param name="regression_models" type="select" multiple="true" label="Only Select Regression Models if you don't want to compare all models">
+<option value="lr">Linear Regression</option>
+<option value="lasso">Lasso Regression</option>
+<option value="ridge">Ridge Regression</option>
+<option value="en">Elastic Net</option>
+<option value="lar">Least Angle Regression</option>
+<option value="llar">Lasso Least Angle Regression</option>
+<option value="omp">Orthogonal Matching Pursuit</option>
+<option value="br">Bayesian Ridge</option>
+<option value="ard">Automatic Relevance Determination</option>
+<option value="par">Passive Aggressive Regressor</option>
+<option value="ransac">Random Sample Consensus</option>
+<option value="tr">TheilSen Regressor</option>
+<option value="huber">Huber Regressor</option>
+<option value="kr">Kernel Ridge</option>
+<option value="svm">Support Vector Regression</option>
+<option value="knn">K Neighbors Regressor</option>
+<option value="dt">Decision Tree Regressor</option>
+<option value="rf">Random Forest Regressor</option>
+<option value="et">Extra Trees Regressor</option>
+<option value="ada">AdaBoost Regressor</option>
+<option value="gbr">Gradient Boosting Regressor</option>
+<option value="mlp">MLP Regressor</option>
+<option value="xgboost">Extreme Gradient Boosting</option>
+<option value="lightgbm">Light Gradient Boosting Machine</option>
+<option value="catboost">CatBoost Regressor</option>
+</param>
+</when>
+</conditional>
+<param name="random_seed" type="integer" value="42" label="Random Seed" help="Random seed for reproducibility." />
+<conditional name="advanced_settings">
+<param name="customize_defaults" type="select" label="Customize Default Settings?" help="Select yes if you want to customize the default settings of the experiment.">
+<option value="false" selected="true">No</option>
+<option value="true">Yes</option>
+</param>
+<when value="true">
+<param name="train_size" type="float" value="0.7" min="0.1" max="0.9" label="Train Size" help="Proportion of the dataset to include in the train split." />
+<param name="normalize" type="boolean" truevalue="True" falsevalue="False" checked="false" label="Normalize Data" help="Whether to normalize data before training." />
+<param name="feature_selection" type="boolean" truevalue="True" falsevalue="False" checked="false" label="Feature Selection" help="Whether to perform feature selection." />
+<conditional name="cross_validation">
+<param name="enable_cross_validation" type="select" label="Enable Cross Validation?" help="Select whether to enable cross-validation. Default: Yes" >
+<option value="false" >No</option>
+<option value="true" selected="true">Yes</option>
+</param>
+<when value="true">
+<param name="cross_validation_folds" type="integer" value="10" min="2" max="20" label="Cross Validation Folds" help="Number of folds to use for cross-validation. Default: 10" />
+</when>
+<when value="false">
+<!-- No additional parameters to show if the user selects 'No' -->
+</when>
+</conditional>
+<param name="remove_outliers" type="boolean" truevalue="True" falsevalue="False" checked="false" label="Remove Outliers" help="Whether to remove outliers from the dataset before training. Default: False" />
+<param name="remove_multicollinearity" type="boolean" truevalue="True" falsevalue="False" checked="false" label="Remove Multicollinearity" help="Whether to remove multicollinear features before training. Default: False" />
+<param name="polynomial_features" type="boolean" truevalue="True" falsevalue="False" checked="false" label="Polynomial Features" help="Whether to create polynomial features before training. Default: False" />
+<param name="fix_imbalance" type="boolean" truevalue="True" falsevalue="False" checked="false" label="Fix Imbalance" help="ONLY for classfication! Whether to use SMOTE or similar methods to fix imbalance in the dataset. Default: False" />
+</when>
+<when value="false">
+<!-- No additional parameters to show if the user selects 'No' -->
+</when>
+</conditional>
+</inputs>
+<outputs>
+<data name="model" format="h5" from_work_dir="pycaret_model.h5" label="${tool.name} best model on ${on_string}" />
+<data name="comparison_result" format="html" from_work_dir="comparison_result.html" label="${tool.name} Comparison result on ${on_string}"/>
+<data name="best_model_csv" format="csv" from_work_dir="best_model.csv" label="${tool.name} The prams of the best model on ${on_string}" hidden="true" />
+</outputs>
+<tests>
+<test>
+<param name="input_file" value="pcr.tsv"/>
+<param name="target_feature" value="11"/>
+<param name="model_type" value="classification"/>
+<param name="random_seed" value="42"/>
+<param name="customize_defaults" value="true"/>
+<param name="train_size" value="0.8"/>
+<param name="normalize" value="true"/>
+<param name="feature_selection" value="true"/>
+<param name="enable_cross_validation" value="true"/>
+<param name="cross_validation_folds" value="5"/>
+<param name="remove_outliers" value="true"/>
+<param name="remove_multicollinearity" value="true"/>
+<output name="model" file="expected_model_classification_customized.h5" compare="sim_size"/>
+<output name="comparison_result" file="expected_comparison_result_classification_customized.html" compare="sim_size" />
+<output name="best_model_csv" value="expected_best_model_classification_customized.csv" />
+</test>
+<test>
+<param name="input_file" value="pcr.tsv"/>
+<param name="target_feature" value="11"/>
+<param name="model_type" value="classification"/>
+<param name="random_seed" value="42"/>
+<output name="model" file="expected_model_classification.h5" compare="sim_size"/>
+<output name="comparison_result" file="expected_comparison_result_classification.html" compare="sim_size" />
+<output name="best_model_csv" value="expected_best_model_classification.csv" />
+</test>
+<test>
+<param name="input_file" value="auto-mpg.tsv"/>
+<param name="target_feature" value="1"/>
+<param name="model_type" value="regression"/>
+<param name="random_seed" value="42"/>
+<output name="model" file="expected_model_regression.h5" compare="sim_size" />
+<output name="comparison_result" file="expected_comparison_result_regression.html" compare="sim_size" />
+<output name="best_model_csv" value="expected_best_model_regression.csv" />
+</test>
+</tests>
+<help>
+This tool uses PyCaret to train and evaluate machine learning models.
+It compares different models on a dataset and provides the best model based on the performance metrics.
+**Outputs**
+- **Model**: The best model trained on the dataset in h5 format.
+- **Comparison Result**: The comparison result of different models in html format.
+It contains the performance metrics of different models, plots of the best model
+on the testing set (or part of the training set if a separate test set is not uploaded), and feature analysis plots.
+</help>
+<expand macro="macro_citations" />
+</tool>

Mercurial > repos > goeckslab > pycaret_compare

comparison pycaret_train.xml @ 0:915447b14520 draft