Mercurial > repos > goeckslab > multimodal_learner
view multimodal_learner.xml @ 0:375c36923da1 draft default tip
planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
| author | goeckslab |
|---|---|
| date | Tue, 09 Dec 2025 23:49:47 +0000 |
| parents | |
| children |
line wrap: on
line source
<tool id="multimodal_learner" name="Multimodal Learner" version="0.1.0" profile="22.01"> <description>Train and evaluate an AutoGluon Multimodal model (tabular + image + text)</description> <requirements> <container type='docker'>quay.io/goeckslab/multimodal-learner:1.4.0</container> </requirements> <required_files> <include path="multimodal_learner.py"/> <include path="utils.py"/> <include path="split_logic.py"/> <include path="training_pipeline.py"/> <include path="test_pipeline.py"/> <include path="metrics_logic.py"/> <include path="plot_logic.py"/> <include path="report_utils.py"/> <include path="feature_help_modal.py"/> </required_files> <stdio> <exit_code range="137" level="fatal_oom" description="Out of Memory"/> <exit_code range="1:" level="fatal" description="Tool failed — see Tool Standard Error"/> </stdio> <command detect_errors="exit_code"><![CDATA[ #import re #set $image_zip_paths = [] #if $use_images_conditional.use_images == "yes" #for $zip_file in $use_images_conditional.images_zip_repeat #set $image_zip_paths = $image_zip_paths + [$zip_file.images_zip] #end for #end if #if len($image_zip_paths) > 0 #set $images_zip_cli = " ".join(["'%s'" % z for z in $image_zip_paths]) #else #set $images_zip_cli = None #end if set -e; ln -sf '$input_csv' 'train_input.csv'; #if $test_dataset_conditional.has_test_dataset == "yes" ln -sf '$test_dataset_conditional.input_test' 'test_input.csv'; #end if python '$__tool_directory__/multimodal_learner.py' --input_csv_train 'train_input.csv' #if $test_dataset_conditional.has_test_dataset == "yes" --input_csv_test 'test_input.csv' #end if --target_column '$target_column' #if $use_images_conditional.use_images == "yes" #if $images_zip_cli --images_zip $images_zip_cli #end if --missing_image_strategy '$use_images_conditional.missing_image_strategy' #if $use_images_conditional.backbone_image --backbone_image '$use_images_conditional.backbone_image' #end if #end if #if $backbone_text not in ("", None) --backbone_text '$backbone_text' #end if --preset '$preset' --eval_metric '$eval_metric' --random_seed '$random_seed' #if $time_limit --time_limit $time_limit #end if #if $deterministic == "true" --deterministic #end if #if $customize_defaults_conditional.customize_defaults == "yes" #if $customize_defaults_conditional.validation_size not in ("", None) --validation_size $customize_defaults_conditional.validation_size #end if #if $customize_defaults_conditional.split_probabilities and str($customize_defaults_conditional.split_probabilities).strip() --split_probabilities #echo " ".join([str(float(x)) for x in str($customize_defaults_conditional.split_probabilities).replace(",", " ").split() if x.strip()]) # #end if #if $customize_defaults_conditional.cross_validation == "true" --cross_validation true --num_folds $customize_defaults_conditional.num_folds #end if #if $customize_defaults_conditional.epochs --epochs $customize_defaults_conditional.epochs #end if #if $customize_defaults_conditional.learning_rate --learning_rate $customize_defaults_conditional.learning_rate #end if #if $customize_defaults_conditional.batch_size --batch_size $customize_defaults_conditional.batch_size #end if #if $customize_defaults_conditional.threshold --threshold $customize_defaults_conditional.threshold #end if #if $customize_defaults_conditional.hyperparameters --hyperparameters '$customize_defaults_conditional.hyperparameters' #end if #end if --output_json '$output_json' --output_html '$output_html' --output_config '$output_config' ]]></command> <inputs> <param name="input_csv" type="data" format="csv,tsv" label="Training dataset (CSV/TSV)" help="Must contain the target column and optional image paths"/> <param name="target_column" type="data_column" data_ref="input_csv" numerical="false" use_header_names="true" label="Target / Label column"/> <conditional name="test_dataset_conditional"> <param name="has_test_dataset" type="boolean" truevalue="yes" falsevalue="no" checked="false" label="Provide separate test dataset?"/> <when value="yes"> <param name="input_test" type="data" format="csv,tsv" optional="true" label="Test dataset (CSV/TSV)"/> </when> <when value="no"/> </conditional> <param name="backbone_text" type="select" label="Text backbone" optional="true"> <option value="microsoft/deberta-v3-base" selected="true">microsoft/deberta-v3-base</option> <option value="microsoft/deberta-v3-small">microsoft/deberta-v3-small</option> <option value="google/electra-base-discriminator">google/electra-base-discriminator</option> <option value="google/electra-small-discriminator">google/electra-small-discriminator</option> <option value="roberta-base">roberta-base</option> <option value="bert-base-uncased">bert-base-uncased</option> <option value="distilroberta-base">distilroberta-base</option> <option value="albert-base-v2">albert-base-v2</option> </param> <conditional name="use_images_conditional"> <param name="use_images" type="boolean" truevalue="yes" falsevalue="no" checked="false" label="Use image modality?"/> <when value="yes"> <repeat name="images_zip_repeat" title="Image archive(s)" min="1"> <param name="images_zip" type="data" format="zip" label="ZIP file containing images"/> </repeat> <param name="backbone_image" type="select" label="Image backbone" optional="true"> <option value='swin_base_patch4_window7_224' selected='true'>swin_base_patch4_window7_224</option> <option value='swin_large_patch4_window12_384.in22k_ft_in1k'>swin_large_patch4_window12_384.in22k_ft_in1k</option> <option value='swin_small_patch4_window7_224'>swin_small_patch4_window7_224</option> <option value='swin_tiny_patch4_window7_224'>swin_tiny_patch4_window7_224</option> <option value='caformer_b36.in21k_ft_in1k'>caformer_b36.in21k_ft_in1k</option> <option value='caformer_m36.in21k_ft_in1k'>caformer_m36.in21k_ft_in1k</option> <option value='caformer_s36.in21k_ft_in1k'>caformer_s36.in21k_ft_in1k</option> <option value='caformer_s18.in1k'>caformer_s18.in1k</option> <option value='caformer_b36.sail_in22k_ft_in1k'>caformer_b36.sail_in22k_ft_in1k</option> <option value='caformer_m36.sail_in22k_ft_in1k'>caformer_m36.sail_in22k_ft_in1k</option> <option value='caformer_s36.sail_in22k_ft_in1k'>caformer_s36.sail_in22k_ft_in1k</option> <option value='vit_base_patch16_224'>vit_base_patch16_224</option> <option value='vit_large_patch14_224'>vit_large_patch14_224</option> <option value='convnext_base'>convnext_base</option> <option value='eva02_base_patch14_448.mim_in22k_ft_in22k_in1k'>eva02_base_patch14_448.mim_in22k_ft_in22k_in1k</option> <option value='resnet50'>resnet50</option> </param> <param name="missing_image_strategy" type="boolean" truevalue="true" falsevalue="false" checked="false" label="Drop rows with missing images?" help="True = drop, False = replace with placeholder (default)"/> </when> <when value="no"/> </conditional> <param name="preset" type="select" label="Quality preset"> <option value="medium_quality" selected="true">Medium quality (fast)</option> <option value="high_quality">High quality</option> <option value="best_quality">Best quality (slowest)</option> </param> <param name="eval_metric" type="select" label="Primary evaluation metric"> <option value="auto" selected="true">Auto (let AutoGluon choose)</option> <option value="roc_auc">ROC AUC</option> <option value="accuracy">Accuracy</option> <option value="balanced_accuracy">Balanced Accuracy</option> <option value="f1">F1</option> <option value="f1_macro">F1 Macro</option> <option value="f1_micro">F1 Micro</option> <option value="f1_weighted">F1 Weighted</option> <option value="precision">Precision</option> <option value="precision_macro">Precision Macro</option> <option value="precision_micro">Precision Micro</option> <option value="precision_weighted">Precision Weighted</option> <option value="recall">Recall</option> <option value="recall_macro">Recall Macro</option> <option value="recall_micro">Recall Micro</option> <option value="recall_weighted">Recall Weighted</option> <option value="average_precision">Average Precision</option> <option value="roc_auc_ovo_macro">ROC AUC OVO Macro</option> <option value="roc_auc_ovo_weighted">ROC AUC OVO Weighted</option> <option value="roc_auc_ovr_macro">ROC AUC OVR Macro</option> <option value="roc_auc_ovr_weighted">ROC AUC OVR Weighted</option> <option value="log_loss">Log Loss</option> <option value="mse">MSE</option> <option value="rmse">RMSE</option> <option value="mae">MAE</option> <option value="msle">MSLE</option> <option value="r2">R2</option> </param> <param name="random_seed" type="integer" value="42" label="Random seed"/> <param name="time_limit" type="integer" optional="true" min="60" label="Time limit (seconds)" help="Total training time budget. Recommended: 3600+ for real runs"/> <param name="deterministic" type="boolean" truevalue="true" falsevalue="false" checked="false" label="Enable deterministic mode" help="Use deterministic algorithms and CuDNN settings to reduce run-to-run variance (may slow training)"/> <conditional name="customize_defaults_conditional"> <param name="customize_defaults" type="boolean" truevalue="yes" falsevalue="no" checked="false" label="Advanced: customize training settings?"/> <when value="yes"> <param name="validation_size" type="float" value="0.2" label="Validation fraction (when test set provided)"/> <param name="split_probabilities" type="text" value="0.7 0.1 0.2" label="Train / Val / Test split (space-separated) when no test set"/> <param name="cross_validation" type="boolean" truevalue="true" falsevalue="false" checked="false" label="Enable k-fold cross-validation"/> <param name="num_folds" type="integer" value="5" label="Number of CV folds"/> <param name="epochs" type="integer" optional="true" label="Max epochs"/> <param name="learning_rate" type="float" optional="true" label="Learning rate"/> <param name="batch_size" type="integer" optional="true" label="Batch size"/> <param name="threshold" type="float" optional="true" min="0" max="1" label="Binary classification threshold"/> <param name="hyperparameters" type="text" optional="true" label="Extra AutoGluon hyperparameters (JSON or YAML string)"/> </when> <when value="no"/> </conditional> </inputs> <outputs> <data name="output_html" format="html" label="Multimodal Learner analysis report on data ${input_csv.name}"/> <data name="output_config" format="yaml" label="Multimodal Learner training config on data ${input_csv.name}"/> <data name="output_json" format="json" label="Multimodal Learner metric results on data ${input_csv.name}"/> </outputs> <tests> <!-- Basic run with images + external test set --> <test expect_num_outputs="3"> <param name="input_csv" value="train.csv"/> <param name="target_column" value="7"/> <param name="test_dataset_conditional|has_test_dataset" value="yes"/> <param name="test_dataset_conditional|input_test" value="test.csv"/> <param name="use_images_conditional|use_images" value="yes"/> <param name="use_images_conditional|images_zip_repeat_0|images_zip" value="images.zip"/> <param name="use_images_conditional|backbone_image" value="resnet50"/> <param name="backbone_text" value="google/electra-base-discriminator"/> <output name="output_html"> <assert_contents> <has_text text="Model Performance Summary"/> </assert_contents> </output> </test> <!-- Custom threshold --> <test expect_num_outputs="3"> <param name="input_csv" value="train.csv"/> <param name="target_column" value="7"/> <param name="test_dataset_conditional|has_test_dataset" value="yes"/> <param name="test_dataset_conditional|input_test" value="test.csv"/> <param name="use_images_conditional|use_images" value="yes"/> <param name="use_images_conditional|images_zip_repeat_0|images_zip" value="images.zip"/> <param name="customize_defaults_conditional|customize_defaults" value="yes"/> <param name="customize_defaults_conditional|threshold" value="0.4"/> <output name="output_json"> <assert_contents> <has_text text=""threshold": 0.4"/> </assert_contents> </output> </test> <!-- No external test set; internal split --> <test expect_num_outputs="3"> <param name="input_csv" value="train.csv"/> <param name="target_column" value="7"/> <param name="test_dataset_conditional|has_test_dataset" value="no"/> <param name="use_images_conditional|use_images" value="yes"/> <param name="use_images_conditional|images_zip_repeat_0|images_zip" value="images.zip"/> <output name="output_json"> <assert_contents> <has_text text=""val""/> </assert_contents> </output> </test> <!-- Text/tabular only (ignore images) --> <test expect_num_outputs="3"> <param name="input_csv" value="train.csv"/> <param name="target_column" value="7"/> <param name="test_dataset_conditional|has_test_dataset" value="yes"/> <param name="test_dataset_conditional|input_test" value="test.csv"/> <param name="use_images_conditional|use_images" value="no"/> <output name="output_html"> <assert_contents> <has_text text="Train and Validation Performance Summary"/> </assert_contents> </output> </test> </tests> <help><![CDATA[ **AutoGluon Multimodal Learner** Trains a powerful multimodal model combining tabular features, images, and text using AutoGluon-Multimodal. - Handles missing images intelligently - Supports cross-validation - Produces detailed HTML reports and transparent metrics - Fully reproducible Ideal for medical imaging + clinical data, product images + descriptions, etc. ]]></help> <citations> <citation type="bibtex"> @article{AutoGluon2023, author = {Erickson, Nick and Mueller, Jonas and Wang, Yizhou and others}, title = {AutoGluon-Tabular: Robust and Accurate AutoML for Structured Data}, journal = {arXiv preprint arXiv:2003.06505}, year = {2023} } </citation> </citations> </tool>
