changeset 1:0bef7ea84b7f draft default tip

planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 973836fb40ecb9c0ac26f675d12b20fc8e5f51f4
author bgruening
date Mon, 14 Apr 2025 09:56:46 +0000
parents bd808d1c4e0c
children
files fetch_cbioportal_data.py flexynesis.xml macros.xml
diffstat 3 files changed, 340 insertions(+), 117 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/fetch_cbioportal_data.py	Mon Apr 14 09:56:46 2025 +0000
@@ -0,0 +1,61 @@
+#!/usr/bin/env python
+
+import argparse
+import os
+
+from flexynesis.utils import CBioPortalData
+
+
+def main():
+    parser = argparse.ArgumentParser(description="Fetch and prepare cBioPortal data for Flexynesis.")
+    parser.add_argument("--study_id", required=True, help="cBioPortal study ID (e.g., 'brca_tcga')")
+    parser.add_argument("--data_types", required=True, help="Comma-separated list of data types (e.g., 'clin,mut,omics')")
+    parser.add_argument("--mapped_files", default=None, help="Comma-separated list of .txt files to map to data_types (optional)")
+    parser.add_argument("--split_ratio", type=float, default=0.7, help="Training/test split ratio (0.0 to 1.0)")
+    parser.add_argument("--output_dir", required=True, help="Output directory for datasets")
+
+    args = parser.parse_args()
+
+    data_types = args.data_types.split(",")
+    if "clin" not in data_types:
+        raise ValueError("Clinical data ('clin') is required for splitting the dataset.")
+
+    file_mapping = {
+        "clin": "data_clinical_patient.txt",  # can be any with 'clinical' in file name
+        "mut": "data_mutations.txt",  # any with 'mutations' in file name
+        "omics": "data_cna.txt",
+        "other": None
+    }
+
+    if args.mapped_files:
+        mapped_files = args.mapped_files.split(",")
+        if len(mapped_files) != len(data_types):
+            raise ValueError(f"Number of mapped files ({len(mapped_files)}) must match number of data types ({len(data_types)}).")
+        files_to_fetch = {dt: mf for dt, mf in zip(data_types, mapped_files)}
+        for mf in mapped_files:
+            if not mf.endswith(".txt"):
+                raise ValueError(f"Mapped file '{mf}' must end with '.txt'.")
+    else:
+        files_to_fetch = {dt: file_mapping[dt] for dt in data_types if dt in file_mapping}
+
+    invalid_types = set(data_types) - set(file_mapping.keys())
+    if invalid_types:
+        raise ValueError(f"Invalid data types: {invalid_types}. Supported types: {list(file_mapping.keys())}")
+
+    cbioportal = CBioPortalData(study_id=args.study_id)
+    cbioportal.get_cbioportal_data(study_id=args.study_id, files=files_to_fetch)
+    dataset = cbioportal.split_data(ratio=args.split_ratio)
+
+    os.makedirs(args.output_dir, exist_ok=True)
+
+    for data_type in data_types:
+        if data_type in dataset['train']:
+            train_file = os.path.join(args.output_dir, f"{data_type}_train.csv")
+            dataset['train'][data_type].to_csv(train_file, index=True)
+        if data_type in dataset['test']:
+            test_file = os.path.join(args.output_dir, f"{data_type}_test.csv")
+            dataset['test'][data_type].to_csv(test_file, index=True)
+
+
+if __name__ == "__main__":
+    main()
--- a/flexynesis.xml	Mon Aug 12 17:58:14 2024 +0000
+++ b/flexynesis.xml	Mon Apr 14 09:56:46 2025 +0000
@@ -71,6 +71,7 @@
                     #end if
                     --fusion_type $fusion_type
                     --hpo_iter $hpo_iter
+                    --val_size $val_size
                     --finetuning_samples $finetuning_samples
                     --variance_threshold $variance_threshold
                     --correlation_threshold $correlation_threshold
@@ -84,6 +85,7 @@
                     $use_loss_weighting
                     $use_cv
                     $evaluate_baseline_performance
+                    --feature_importance_method $feature_importance_method
                     $disable_marker_finding
                     \${GALAXY_FLEXYNESIS_EXTRA_ARGUMENTS}
     ]]></command>
@@ -110,6 +112,7 @@
                         <option value="RandomForest">RandomForest</option>
                         <option value="SVM">SVM</option>
                         <option value="RandomSurvivalForest">RandomSurvivalForest</option>
+                        <option value="XGBoost">XGBoost</option>
                     </param>
                     <when value="DirectPred"/>
                     <when value="GNN">
@@ -133,6 +136,7 @@
                     <when value="RandomForest"/>
                     <when value="SVM"/>
                     <when value="RandomSurvivalForest"/>
+                    <when value="XGBoost"/>
                 </conditional>
                 <param argument="--target_variables" type="text" label="Target variables" help="Which variables in 'clin.csv' to use for predictions, comma-separated if multiple.">
                     <sanitizer invalid_char="">
@@ -189,22 +193,26 @@
     <tests>
         <test>
             <param name="non_commercial_use" value="True"/>
-            <param name="train_clin" value="train/clin" ftype="csv"/>
-            <param name="test_clin" value="test/clin" ftype="csv"/>
-            <param name="train_omics_main" value="train/gex" ftype="csv"/>
-            <param name="test_omics_main" value="test/gex" ftype="csv"/>
-            <param name="assay_main" value="bar"/>
-            <repeat name="omics">
-                <param name="train_omics" value="train/cnv" ftype="csv"/>
-                <param name="test_omics" value="test/cnv" ftype="csv"/>
-                <param name="assay" value="foo"/>
-            </repeat>
             <conditional name="training_type">
                 <param name="model" value="s_train"/>
-                <param name="model_class" value="DirectPred"/>
+                <param name="train_clin" value="train/clin" ftype="csv"/>
+                <param name="test_clin" value="test/clin" ftype="csv"/>
+                <param name="train_omics_main" value="train/gex" ftype="csv"/>
+                <param name="test_omics_main" value="test/gex" ftype="csv"/>
+                <param name="assay_main" value="bar"/>
+                <repeat name="omics">
+                    <param name="train_omics" value="train/cnv" ftype="csv"/>
+                    <param name="test_omics" value="test/cnv" ftype="csv"/>
+                    <param name="assay" value="foo"/>
+                </repeat>
+                <conditional name="model_class">
+                    <param name="model_class" value="DirectPred"/>
+                </conditional>
                 <param name="target_variables" value="Erlotinib"/>
+                <section name="advanced">
+                    <param name="hpo_iter" value="1"/>
+                </section>
             </conditional>
-            <param name="hpo_iter" value="1"/>
             <output_collection name="results" type="list">
                 <element name="job.embeddings_test">
                     <assert_contents>
@@ -216,10 +224,18 @@
                         <has_n_lines n="50"/>
                     </assert_contents>
                 </element>
-                <element name="job.feature_importance">
+                <element name="job.feature_importance.GradientShap">
                     <assert_contents>
                         <has_text_matching expression="Erlotinib,0,,bar,A2M,"/>
                         <has_text_matching expression="Erlotinib,0,,bar,ABCC4,"/>
+                        <has_text_matching expression="GradientShap"/>
+                    </assert_contents>
+                </element>
+                <element name="job.feature_importance.IntegratedGradients">
+                    <assert_contents>
+                        <has_text_matching expression="Erlotinib,0,,bar,A2M,"/>
+                        <has_text_matching expression="Erlotinib,0,,bar,ABCC4,"/>
+                        <has_text_matching expression="IntegratedGradients"/>
                     </assert_contents>
                 </element>
                 <element name="job.feature_logs.bar">
@@ -249,17 +265,21 @@
         </test>
         <test>
             <param name="non_commercial_use" value="True"/>
-            <param name="train_clin" value="train/clin" ftype="csv"/>
-            <param name="test_clin" value="test/clin" ftype="csv"/>
-            <param name="train_omics_main" value="train/gex" ftype="csv"/>
-            <param name="test_omics_main" value="test/gex" ftype="csv"/>
-            <param name="assay_main" value="bar"/>
             <conditional name="training_type">
                 <param name="model" value="s_train"/>
-                <param name="model_class" value="DirectPred"/>
+                <param name="train_clin" value="train/clin" ftype="csv"/>
+                <param name="test_clin" value="test/clin" ftype="csv"/>
+                <param name="train_omics_main" value="train/gex" ftype="csv"/>
+                <param name="test_omics_main" value="test/gex" ftype="csv"/>
+                <param name="assay_main" value="bar"/>
+                <conditional name="model_class">
+                    <param name="model_class" value="DirectPred"/>
+                </conditional>
                 <param name="target_variables" value="Erlotinib"/>
+                <section name="advanced">
+                    <param name="hpo_iter" value="1"/>
+                </section>
             </conditional>
-            <param name="hpo_iter" value="1"/>
             <output_collection name="results" type="list">
                 <element name="job.embeddings_test">
                     <assert_contents>
@@ -271,10 +291,18 @@
                         <has_n_lines n="50"/>
                     </assert_contents>
                 </element>
-                <element name="job.feature_importance">
+                <element name="job.feature_importance.GradientShap">
                     <assert_contents>
                         <has_text_matching expression="Erlotinib,0,,bar,A2M,"/>
                         <has_text_matching expression="Erlotinib,0,,bar,ABCC4,"/>
+                        <has_text_matching expression="GradientShap"/>
+                    </assert_contents>
+                </element>
+                <element name="job.feature_importance.IntegratedGradients">
+                    <assert_contents>
+                        <has_text_matching expression="Erlotinib,0,,bar,A2M,"/>
+                        <has_text_matching expression="Erlotinib,0,,bar,ABCC4,"/>
+                        <has_text_matching expression="IntegratedGradients"/>
                     </assert_contents>
                 </element>
                 <element name="job.feature_logs.bar">
@@ -299,22 +327,26 @@
         </test>
         <test>
             <param name="non_commercial_use" value="True"/>
-            <param name="train_clin" value="train/clin" ftype="csv"/>
-            <param name="test_clin" value="test/clin" ftype="csv"/>
-            <param name="train_omics_main" value="train/gex" ftype="csv"/>
-            <param name="test_omics_main" value="test/gex" ftype="csv"/>
-            <param name="assay_main" value="bar"/>
-            <repeat name="omics">
-                <param name="train_omics" value="train/cnv" ftype="csv"/>
-                <param name="test_omics" value="test/cnv" ftype="csv"/>
-                <param name="assay" value="foo"/>
-            </repeat>
             <conditional name="training_type">
                 <param name="model" value="s_train"/>
-                <param name="model_class" value="DirectPred"/>
+                <param name="train_clin" value="train/clin" ftype="csv"/>
+                <param name="test_clin" value="test/clin" ftype="csv"/>
+                <param name="train_omics_main" value="train/gex" ftype="csv"/>
+                <param name="test_omics_main" value="test/gex" ftype="csv"/>
+                <param name="assay_main" value="bar"/>
+                <repeat name="omics">
+                    <param name="train_omics" value="train/cnv" ftype="csv"/>
+                    <param name="test_omics" value="test/cnv" ftype="csv"/>
+                    <param name="assay" value="foo"/>
+                </repeat>
+                <conditional name="model_class">
+                    <param name="model_class" value="DirectPred"/>
+                </conditional>
                 <param name="target_variables" value="Irinotecan"/>
+                <section name="advanced">
+                    <param name="hpo_iter" value="1"/>
+                </section>
             </conditional>
-            <param name="hpo_iter" value="1"/>
             <output_collection name="results" type="list">
                 <element name="job.embeddings_test">
                     <assert_contents>
@@ -326,10 +358,18 @@
                         <has_n_lines n="50"/>
                     </assert_contents>
                 </element>
-                <element name="job.feature_importance">
+                <element name="job.feature_importance.GradientShap">
                     <assert_contents>
                         <has_text_matching expression="Irinotecan,0,,bar,A2M,"/>
                         <has_text_matching expression="Irinotecan,0,,bar,ABCC4,"/>
+                        <has_text_matching expression="GradientShap"/>
+                    </assert_contents>
+                </element>
+                <element name="job.feature_importance.IntegratedGradients">
+                    <assert_contents>
+                        <has_text_matching expression="Irinotecan,0,,bar,A2M,"/>
+                        <has_text_matching expression="Irinotecan,0,,bar,ABCC4,"/>
+                        <has_text_matching expression="IntegratedGradients"/>
                     </assert_contents>
                 </element>
                 <element name="job.feature_logs.bar">
@@ -337,7 +377,7 @@
                         <has_n_lines n="25"/>
                     </assert_contents>
                 </element>
-                <element name="job.feature_logs.bar">
+                <element name="job.feature_logs.omics_foo">
                     <assert_contents>
                         <has_n_lines n="25"/>
                     </assert_contents>
@@ -359,21 +399,23 @@
         </test>
         <test>
             <param name="non_commercial_use" value="True"/>
-            <param name="train_clin" value="train/clin" ftype="csv"/>
-            <param name="test_clin" value="test/clin" ftype="csv"/>
-            <param name="train_omics_main" value="train/gex" ftype="csv"/>
-            <param name="test_omics_main" value="test/gex" ftype="csv"/>
-            <param name="assay_main" value="bar"/>
-            <repeat name="omics">
-                <param name="train_omics" value="train/cnv" ftype="csv"/>
-                <param name="test_omics" value="test/cnv" ftype="csv"/>
-                <param name="assay" value="foo"/>
-            </repeat>
             <conditional name="training_type">
                 <param name="model" value="us_train"/>
+                <param name="train_clin" value="train/clin" ftype="csv"/>
+                <param name="test_clin" value="test/clin" ftype="csv"/>
+                <param name="train_omics_main" value="train/gex" ftype="csv"/>
+                <param name="test_omics_main" value="test/gex" ftype="csv"/>
+                <param name="assay_main" value="bar"/>
+                <repeat name="omics">
+                    <param name="train_omics" value="train/cnv" ftype="csv"/>
+                    <param name="test_omics" value="test/cnv" ftype="csv"/>
+                    <param name="assay" value="foo"/>
+                </repeat>
                 <param name="model_class" value="supervised_vae"/>
+                <section name="advanced">
+                    <param name="hpo_iter" value="1"/>
+                </section>
             </conditional>
-            <param name="hpo_iter" value="1"/>
             <output_collection name="results" type="list">
                 <element name="job.embeddings_test">
                     <assert_contents>
@@ -399,23 +441,25 @@
         </test>
         <test>
             <param name="non_commercial_use" value="True"/>
-            <param name="train_clin" value="train/clin" ftype="csv"/>
-            <param name="test_clin" value="test/clin" ftype="csv"/>
-            <param name="train_omics_main" value="train/gex" ftype="csv"/>
-            <param name="test_omics_main" value="test/gex" ftype="csv"/>
-            <param name="assay_main" value="bar"/>
-            <param name="layer_main" value="input"/>
-            <repeat name="omics">
-                <param name="train_omics" value="train/cnv" ftype="csv"/>
-                <param name="test_omics" value="test/cnv" ftype="csv"/>
-                <param name="assay" value="foo"/>
-                <param name="layer" value="output"/>
-            </repeat>
             <conditional name="training_type">
                 <param name="model" value="cm_train"/>
+                <param name="train_clin" value="train/clin" ftype="csv"/>
+                <param name="test_clin" value="test/clin" ftype="csv"/>
+                <param name="train_omics_main" value="train/gex" ftype="csv"/>
+                <param name="test_omics_main" value="test/gex" ftype="csv"/>
+                <param name="assay_main" value="bar"/>
+                <param name="layer_main" value="input"/>
+                <repeat name="omics">
+                    <param name="train_omics" value="train/cnv" ftype="csv"/>
+                    <param name="test_omics" value="test/cnv" ftype="csv"/>
+                    <param name="assay" value="foo"/>
+                    <param name="layer" value="output"/>
+                </repeat>
                 <param name="model_class" value="CrossModalPred"/>
+                <section name="advanced">
+                    <param name="hpo_iter" value="1"/>
+                </section>
             </conditional>
-            <param name="hpo_iter" value="1"/>
             <output_collection name="results" type="list">
                 <element name="job.embeddings_test">
                     <assert_contents>
@@ -451,25 +495,29 @@
         </test>
         <test>
             <param name="non_commercial_use" value="True"/>
-            <param name="train_clin" value="train/clin" ftype="csv"/>
-            <param name="test_clin" value="test/clin" ftype="csv"/>
-            <param name="train_omics_main" value="train/gex" ftype="csv"/>
-            <param name="test_omics_main" value="test/gex" ftype="csv"/>
-            <param name="assay_main" value="bar"/>
-            <repeat name="omics">
-                <param name="train_omics" value="train/cnv" ftype="csv"/>
-                <param name="test_omics" value="test/cnv" ftype="csv"/>
-                <param name="assay" value="foo"/>
-            </repeat>
             <conditional name="training_type">
                 <param name="model" value="s_train"/>
-                <param name="model_class" value="GNN"/>
-                <param name="gnn_conv_type" value="GC"/>
-                <param name="string_organism" value="9606"/>
-                <param name="string_node_name" value="gene_name"/>
+                <param name="train_clin" value="train/clin" ftype="csv"/>
+                <param name="test_clin" value="test/clin" ftype="csv"/>
+                <param name="train_omics_main" value="train/gex" ftype="csv"/>
+                <param name="test_omics_main" value="test/gex" ftype="csv"/>
+                <param name="assay_main" value="bar"/>
+                <repeat name="omics">
+                    <param name="train_omics" value="train/cnv" ftype="csv"/>
+                    <param name="test_omics" value="test/cnv" ftype="csv"/>
+                    <param name="assay" value="foo"/>
+                </repeat>
+                <conditional name="model_class">
+                    <param name="model_class" value="GNN"/>
+                    <param name="gnn_conv_type" value="GC"/>
+                    <param name="string_organism" value="9606"/>
+                    <param name="string_node_name" value="gene_name"/>
+                </conditional>
                 <param name="target_variables" value="Erlotinib"/>
+                <section name="advanced">
+                    <param name="hpo_iter" value="1"/>
+                </section>
             </conditional>
-            <param name="hpo_iter" value="1"/>
             <output_collection name="results" type="list">
                 <element name="job.embeddings_test">
                     <assert_contents>
@@ -481,7 +529,155 @@
                         <has_n_lines n="50"/>
                     </assert_contents>
                 </element>
-                <element name="job.feature_importance">
+                <element name="job.feature_importance.GradientShap">
+                    <assert_contents>
+                        <has_text_matching expression="Erlotinib,0,,bar,A2M,"/>
+                        <has_text_matching expression="Erlotinib,0,,bar,ABCC4,"/>
+                        <has_text_matching expression="GradientShap"/>
+                    </assert_contents>
+                </element>
+                <element name="job.feature_importance.IntegratedGradients">
+                    <assert_contents>
+                        <has_text_matching expression="Erlotinib,0,,bar,A2M,"/>
+                        <has_text_matching expression="Erlotinib,0,,bar,ABCC4,"/>
+                        <has_text_matching expression="IntegratedGradients"/>
+                    </assert_contents>
+                </element>
+                <element name="job.feature_logs.bar">
+                    <assert_contents>
+                        <has_n_lines n="25"/>
+                    </assert_contents>
+                </element>
+                <element name="job.feature_logs.omics_foo">
+                    <assert_contents>
+                        <has_n_lines n="25"/>
+                    </assert_contents>
+                </element>
+                <element name="job.predicted_labels">
+                    <assert_contents>
+                        <has_text_matching expression="source_dataset:A-704,Erlotinib,"/>
+                        <has_text_matching expression="target_dataset:KMRC-20,Erlotinib,"/>
+                    </assert_contents>
+                </element>
+                <element name="job.stats">
+                    <assert_contents>
+                        <has_text_matching expression="GNN,Erlotinib,numerical,mse,"/>
+                        <has_text_matching expression="GNN,Erlotinib,numerical,r2,"/>
+                        <has_text_matching expression="GNN,Erlotinib,numerical,pearson_corr,"/>
+                    </assert_contents>
+                </element>
+            </output_collection>
+        </test>
+        <test>
+            <param name="non_commercial_use" value="True"/>
+            <conditional name="training_type">
+                <param name="model" value="us_train"/>
+                <param name="train_clin" value="train/clin" ftype="csv"/>
+                <param name="test_clin" value="test/clin" ftype="csv"/>
+                <param name="train_omics_main" value="train/gex" ftype="csv"/>
+                <param name="test_omics_main" value="test/gex" ftype="csv"/>
+                <param name="assay_main" value="b ar"/>
+                <repeat name="omics">
+                    <param name="train_omics" value="train/cnv" ftype="csv"/>
+                    <param name="test_omics" value="test/cnv" ftype="csv"/>
+                    <param name="assay" value="f oo"/>
+                </repeat>
+                <param name="model_class" value="supervised_vae"/>
+                <section name="advanced">
+                    <param name="hpo_iter" value="1"/>
+                </section>
+            </conditional>
+            <output_collection name="results" type="list">
+                <element name="job.embeddings_test">
+                    <assert_contents>
+                        <has_n_lines n="50"/>
+                    </assert_contents>
+                </element>
+                <element name="job.embeddings_train">
+                    <assert_contents>
+                        <has_n_lines n="50"/>
+                    </assert_contents>
+                </element>
+                <element name="job.feature_logs.b_ar">
+                    <assert_contents>
+                        <has_n_lines n="25"/>
+                    </assert_contents>
+                </element>
+                <element name="job.feature_logs.omics_f_oo">
+                    <assert_contents>
+                        <has_n_lines n="25"/>
+                    </assert_contents>
+                </element>
+            </output_collection>
+        </test>
+        <test>
+            <param name="non_commercial_use" value="True"/>
+            <conditional name="training_type">
+                <param name="model" value="s_train"/>
+                <param name="train_clin" value="train/clin" ftype="csv"/>
+                <param name="test_clin" value="test/clin" ftype="csv"/>
+                <param name="train_omics_main" value="train/gex" ftype="csv"/>
+                <param name="test_omics_main" value="test/gex" ftype="csv"/>
+                <param name="assay_main" value="bar"/>
+                <repeat name="omics">
+                    <param name="train_omics" value="train/cnv" ftype="csv"/>
+                    <param name="test_omics" value="test/cnv" ftype="csv"/>
+                    <param name="assay" value="foo"/>
+                </repeat>
+                <conditional name="model_class">
+                    <param name="model_class" value="XGBoost"/>
+                </conditional>
+                <param name="target_variables" value="Erlotinib"/>
+                <section name="advanced">
+                    <param name="hpo_iter" value="1"/>
+                </section>
+            </conditional>
+            <output_collection name="results" type="list">
+                <element name="job.stats">
+                    <assert_contents>
+                        <has_text_matching expression="XGBoostRegressor,Erlotinib,numerical,mse,"/>
+                        <has_text_matching expression="XGBoostRegressor,Erlotinib,numerical,r2,"/>
+                        <has_text_matching expression="XGBoostRegressor,Erlotinib,numerical,pearson_corr,"/>
+                    </assert_contents>
+                </element>
+            </output_collection>
+        </test>
+        <test>
+            <param name="non_commercial_use" value="True"/>
+            <conditional name="training_type">
+                <param name="model" value="s_train"/>
+                <param name="train_clin" value="train/clin" ftype="csv"/>
+                <param name="test_clin" value="test/clin" ftype="csv"/>
+                <param name="train_omics_main" value="train/gex" ftype="csv"/>
+                <param name="test_omics_main" value="test/gex" ftype="csv"/>
+                <param name="assay_main" value="bar"/>
+                <repeat name="omics">
+                    <param name="train_omics" value="train/cnv" ftype="csv"/>
+                    <param name="test_omics" value="test/cnv" ftype="csv"/>
+                    <param name="assay" value="foo"/>
+                </repeat>
+                <conditional name="model_class">
+                    <param name="model_class" value="DirectPred"/>
+                </conditional>
+                <param name="target_variables" value="Erlotinib"/>
+                <section name="advanced">
+                    <param name="hpo_iter" value="1"/>
+                    <param name="feature_importance_method" value="IntegratedGradients"/>
+                    <param name="val_size" value="0.2"/>
+                </section>
+            </conditional>
+            <output_collection name="results" type="list">
+                <element name="job.embeddings_test">
+                    <assert_contents>
+                        <has_n_lines n="50"/>
+                    </assert_contents>
+                </element>
+                <element name="job.embeddings_train">
+                    <assert_contents>
+                        <has_n_lines n="50"/>
+                    </assert_contents>
+                </element>
+                <element name="job.feature_importance.IntegratedGradients">
                     <assert_contents>
                         <has_text_matching expression="Erlotinib,0,,bar,A2M,"/>
                         <has_text_matching expression="Erlotinib,0,,bar,ABCC4,"/>
@@ -512,46 +708,6 @@
                 </element>
             </output_collection>
         </test>
-        <test>
-            <param name="non_commercial_use" value="True"/>
-            <param name="train_clin" value="train/clin" ftype="csv"/>
-            <param name="test_clin" value="test/clin" ftype="csv"/>
-            <param name="train_omics_main" value="train/gex" ftype="csv"/>
-            <param name="test_omics_main" value="test/gex" ftype="csv"/>
-            <param name="assay_main" value="b ar"/>
-            <repeat name="omics">
-                <param name="train_omics" value="train/cnv" ftype="csv"/>
-                <param name="test_omics" value="test/cnv" ftype="csv"/>
-                <param name="assay" value="f oo"/>
-            </repeat>
-            <conditional name="training_type">
-                <param name="model" value="us_train"/>
-                <param name="model_class" value="supervised_vae"/>
-            </conditional>
-            <param name="hpo_iter" value="1"/>
-            <output_collection name="results" type="list">
-                <element name="job.embeddings_test">
-                    <assert_contents>
-                        <has_n_lines n="50"/>
-                    </assert_contents>
-                </element>
-                <element name="job.embeddings_train">
-                    <assert_contents>
-                        <has_n_lines n="50"/>
-                    </assert_contents>
-                </element>
-                <element name="job.feature_logs.b_ar">
-                    <assert_contents>
-                        <has_n_lines n="25"/>
-                    </assert_contents>
-                </element>
-                <element name="job.feature_logs.omics_f_oo">
-                    <assert_contents>
-                        <has_n_lines n="25"/>
-                    </assert_contents>
-                </element>
-            </output_collection>
-        </test>
     </tests>
     <help>
 .. class:: warningmark
--- a/macros.xml	Mon Aug 12 17:58:14 2024 +0000
+++ b/macros.xml	Mon Apr 14 09:56:46 2025 +0000
@@ -1,6 +1,6 @@
 <macros>
-    <token name="@TOOL_VERSION@">0.2.10</token>
-    <token name="@VERSION_SUFFIX@">0</token>
+    <token name="@TOOL_VERSION@">0.2.17</token>
+    <token name="@VERSION_SUFFIX@">1</token>
     <token name="@PROFILE@">24.1</token>
     <xml name="requirements">
         <requirements>
@@ -65,10 +65,16 @@
             <param argument="--log_transform" type="boolean" truevalue="--log_transform True" falsevalue="" checked="false" label="Whether to apply log-transformation to input data matrices" />
             <param argument="--early_stop_patience" type="integer" min="-1" value="10" label="How many epochs to wait when no improvements in validation loss are observed." help="Set to -1 to disable early stopping." />
             <param argument="--hpo_iter" type="integer" min="1" value="100" label="Number of iterations for hyperparameter optimisation." />
+            <param argument="--val_size" type="float" min="0.0" max="1" value="0.2" label="Proportion of training data to be used as validation split"/>
             <param argument="--hpo_patience" type="integer" min="0" value="10" label="How many hyperparameter optimisation iterations to wait for when no improvements are observed." help="Set to 0 to disable early stopping." />
             <param argument="--use_cv" type="boolean" truevalue="--use_cv" falsevalue="" checked="false" label="Cross validation" help="If set, a 5-fold cross-validation training will be done. Otherwise, a single training on 80 percent of the dataset is done. " />
             <param argument="--use_loss_weighting" type="boolean" truevalue="--use_loss_weighting True" falsevalue="" checked="true" label="Whether to apply loss-balancing using uncertainty weights method." />
             <param argument="--evaluate_baseline_performance" type="boolean" truevalue="--evaluate_baseline_performance" falsevalue="" checked="false" label="Enable modeling also with Random Forest + SVMs to see the performance of off-the-shelf tools on the same dataset." />
+            <param argument="--feature_importance_method" type="select" label="which method(s) to use to compute feature importance scores.">
+                <option value="Both" selected="true">Both</option>
+                <option value="IntegratedGradients">IntegratedGradients</option>
+                <option value="GradientShap">GradientShap</option>
+            </param>
             <param argument="--disable_marker_finding" type="boolean" truevalue="--disable_marker_finding" falsevalue="" checked="false" label="Disable marker discovery after model training." />
         </section>
     </xml>