comparison multimodal_learner.xml @ 0:375c36923da1 draft default tip

planemo upload for repository https://github.com/goeckslab/gleam.git commit 1c6c1ad7a1b2bd3645aa0eafa2167784820b52e0
author goeckslab
date Tue, 09 Dec 2025 23:49:47 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:375c36923da1
1 <tool id="multimodal_learner" name="Multimodal Learner" version="0.1.0" profile="22.01">
2 <description>Train and evaluate an AutoGluon Multimodal model (tabular + image + text)</description>
3
4 <requirements>
5 <container type='docker'>quay.io/goeckslab/multimodal-learner:1.4.0</container>
6 </requirements>
7
8 <required_files>
9 <include path="multimodal_learner.py"/>
10 <include path="utils.py"/>
11 <include path="split_logic.py"/>
12 <include path="training_pipeline.py"/>
13 <include path="test_pipeline.py"/>
14 <include path="metrics_logic.py"/>
15 <include path="plot_logic.py"/>
16 <include path="report_utils.py"/>
17 <include path="feature_help_modal.py"/>
18 </required_files>
19
20 <stdio>
21 <exit_code range="137" level="fatal_oom" description="Out of Memory"/>
22 <exit_code range="1:" level="fatal" description="Tool failed — see Tool Standard Error"/>
23 </stdio>
24
25 <command detect_errors="exit_code"><![CDATA[
26 #import re
27
28 #set $image_zip_paths = []
29 #if $use_images_conditional.use_images == "yes"
30 #for $zip_file in $use_images_conditional.images_zip_repeat
31 #set $image_zip_paths = $image_zip_paths + [$zip_file.images_zip]
32 #end for
33 #end if
34 #if len($image_zip_paths) > 0
35 #set $images_zip_cli = " ".join(["'%s'" % z for z in $image_zip_paths])
36 #else
37 #set $images_zip_cli = None
38 #end if
39
40 set -e;
41 ln -sf '$input_csv' 'train_input.csv';
42 #if $test_dataset_conditional.has_test_dataset == "yes"
43 ln -sf '$test_dataset_conditional.input_test' 'test_input.csv';
44 #end if
45
46 python '$__tool_directory__/multimodal_learner.py'
47 --input_csv_train 'train_input.csv'
48 #if $test_dataset_conditional.has_test_dataset == "yes"
49 --input_csv_test 'test_input.csv'
50 #end if
51 --target_column '$target_column'
52
53 #if $use_images_conditional.use_images == "yes"
54 #if $images_zip_cli
55 --images_zip $images_zip_cli
56 #end if
57 --missing_image_strategy '$use_images_conditional.missing_image_strategy'
58 #if $use_images_conditional.backbone_image
59 --backbone_image '$use_images_conditional.backbone_image'
60 #end if
61 #end if
62
63 #if $backbone_text not in ("", None)
64 --backbone_text '$backbone_text'
65 #end if
66
67 --preset '$preset'
68 --eval_metric '$eval_metric'
69
70 --random_seed '$random_seed'
71 #if $time_limit
72 --time_limit $time_limit
73 #end if
74 #if $deterministic == "true"
75 --deterministic
76 #end if
77
78 #if $customize_defaults_conditional.customize_defaults == "yes"
79 #if $customize_defaults_conditional.validation_size not in ("", None)
80 --validation_size $customize_defaults_conditional.validation_size
81 #end if
82 #if $customize_defaults_conditional.split_probabilities and str($customize_defaults_conditional.split_probabilities).strip()
83 --split_probabilities #echo " ".join([str(float(x)) for x in str($customize_defaults_conditional.split_probabilities).replace(",", " ").split() if x.strip()]) #
84 #end if
85 #if $customize_defaults_conditional.cross_validation == "true"
86 --cross_validation true
87 --num_folds $customize_defaults_conditional.num_folds
88 #end if
89 #if $customize_defaults_conditional.epochs
90 --epochs $customize_defaults_conditional.epochs
91 #end if
92 #if $customize_defaults_conditional.learning_rate
93 --learning_rate $customize_defaults_conditional.learning_rate
94 #end if
95 #if $customize_defaults_conditional.batch_size
96 --batch_size $customize_defaults_conditional.batch_size
97 #end if
98 #if $customize_defaults_conditional.threshold
99 --threshold $customize_defaults_conditional.threshold
100 #end if
101 #if $customize_defaults_conditional.hyperparameters
102 --hyperparameters '$customize_defaults_conditional.hyperparameters'
103 #end if
104 #end if
105
106 --output_json '$output_json'
107 --output_html '$output_html'
108 --output_config '$output_config'
109 ]]></command>
110
111 <inputs>
112 <param name="input_csv" type="data" format="csv,tsv" label="Training dataset (CSV/TSV)" help="Must contain the target column and optional image paths"/>
113 <param name="target_column" type="data_column" data_ref="input_csv" numerical="false" use_header_names="true" label="Target / Label column"/>
114
115 <conditional name="test_dataset_conditional">
116 <param name="has_test_dataset" type="boolean" truevalue="yes" falsevalue="no" checked="false" label="Provide separate test dataset?"/>
117 <when value="yes">
118 <param name="input_test" type="data" format="csv,tsv" optional="true" label="Test dataset (CSV/TSV)"/>
119 </when>
120 <when value="no"/>
121 </conditional>
122
123 <param name="backbone_text" type="select" label="Text backbone" optional="true">
124 <option value="microsoft/deberta-v3-base" selected="true">microsoft/deberta-v3-base</option>
125 <option value="microsoft/deberta-v3-small">microsoft/deberta-v3-small</option>
126 <option value="google/electra-base-discriminator">google/electra-base-discriminator</option>
127 <option value="google/electra-small-discriminator">google/electra-small-discriminator</option>
128 <option value="roberta-base">roberta-base</option>
129 <option value="bert-base-uncased">bert-base-uncased</option>
130 <option value="distilroberta-base">distilroberta-base</option>
131 <option value="albert-base-v2">albert-base-v2</option>
132 </param>
133
134 <conditional name="use_images_conditional">
135 <param name="use_images" type="boolean" truevalue="yes" falsevalue="no" checked="false" label="Use image modality?"/>
136 <when value="yes">
137 <repeat name="images_zip_repeat" title="Image archive(s)" min="1">
138 <param name="images_zip" type="data" format="zip" label="ZIP file containing images"/>
139 </repeat>
140 <param name="backbone_image" type="select" label="Image backbone" optional="true">
141 <option value='swin_base_patch4_window7_224' selected='true'>swin_base_patch4_window7_224</option>
142 <option value='swin_large_patch4_window12_384.in22k_ft_in1k'>swin_large_patch4_window12_384.in22k_ft_in1k</option>
143 <option value='swin_small_patch4_window7_224'>swin_small_patch4_window7_224</option>
144 <option value='swin_tiny_patch4_window7_224'>swin_tiny_patch4_window7_224</option>
145 <option value='caformer_b36.in21k_ft_in1k'>caformer_b36.in21k_ft_in1k</option>
146 <option value='caformer_m36.in21k_ft_in1k'>caformer_m36.in21k_ft_in1k</option>
147 <option value='caformer_s36.in21k_ft_in1k'>caformer_s36.in21k_ft_in1k</option>
148 <option value='caformer_s18.in1k'>caformer_s18.in1k</option>
149 <option value='caformer_b36.sail_in22k_ft_in1k'>caformer_b36.sail_in22k_ft_in1k</option>
150 <option value='caformer_m36.sail_in22k_ft_in1k'>caformer_m36.sail_in22k_ft_in1k</option>
151 <option value='caformer_s36.sail_in22k_ft_in1k'>caformer_s36.sail_in22k_ft_in1k</option>
152 <option value='vit_base_patch16_224'>vit_base_patch16_224</option>
153 <option value='vit_large_patch14_224'>vit_large_patch14_224</option>
154 <option value='convnext_base'>convnext_base</option>
155 <option value='eva02_base_patch14_448.mim_in22k_ft_in22k_in1k'>eva02_base_patch14_448.mim_in22k_ft_in22k_in1k</option>
156 <option value='resnet50'>resnet50</option>
157 </param>
158 <param name="missing_image_strategy" type="boolean" truevalue="true" falsevalue="false" checked="false"
159 label="Drop rows with missing images?" help="True = drop, False = replace with placeholder (default)"/>
160 </when>
161 <when value="no"/>
162 </conditional>
163
164 <param name="preset" type="select" label="Quality preset">
165 <option value="medium_quality" selected="true">Medium quality (fast)</option>
166 <option value="high_quality">High quality</option>
167 <option value="best_quality">Best quality (slowest)</option>
168 </param>
169
170 <param name="eval_metric" type="select" label="Primary evaluation metric">
171 <option value="auto" selected="true">Auto (let AutoGluon choose)</option>
172 <option value="roc_auc">ROC AUC</option>
173 <option value="accuracy">Accuracy</option>
174 <option value="balanced_accuracy">Balanced Accuracy</option>
175 <option value="f1">F1</option>
176 <option value="f1_macro">F1 Macro</option>
177 <option value="f1_micro">F1 Micro</option>
178 <option value="f1_weighted">F1 Weighted</option>
179 <option value="precision">Precision</option>
180 <option value="precision_macro">Precision Macro</option>
181 <option value="precision_micro">Precision Micro</option>
182 <option value="precision_weighted">Precision Weighted</option>
183 <option value="recall">Recall</option>
184 <option value="recall_macro">Recall Macro</option>
185 <option value="recall_micro">Recall Micro</option>
186 <option value="recall_weighted">Recall Weighted</option>
187 <option value="average_precision">Average Precision</option>
188 <option value="roc_auc_ovo_macro">ROC AUC OVO Macro</option>
189 <option value="roc_auc_ovo_weighted">ROC AUC OVO Weighted</option>
190 <option value="roc_auc_ovr_macro">ROC AUC OVR Macro</option>
191 <option value="roc_auc_ovr_weighted">ROC AUC OVR Weighted</option>
192 <option value="log_loss">Log Loss</option>
193 <option value="mse">MSE</option>
194 <option value="rmse">RMSE</option>
195 <option value="mae">MAE</option>
196 <option value="msle">MSLE</option>
197 <option value="r2">R2</option>
198 </param>
199
200 <param name="random_seed" type="integer" value="42" label="Random seed"/>
201
202 <param name="time_limit" type="integer" optional="true" min="60" label="Time limit (seconds)" help="Total training time budget. Recommended: 3600+ for real runs"/>
203 <param name="deterministic" type="boolean" truevalue="true" falsevalue="false" checked="false"
204 label="Enable deterministic mode" help="Use deterministic algorithms and CuDNN settings to reduce run-to-run variance (may slow training)"/>
205
206 <conditional name="customize_defaults_conditional">
207 <param name="customize_defaults" type="boolean" truevalue="yes" falsevalue="no" checked="false" label="Advanced: customize training settings?"/>
208 <when value="yes">
209 <param name="validation_size" type="float" value="0.2" label="Validation fraction (when test set provided)"/>
210 <param name="split_probabilities" type="text" value="0.7 0.1 0.2" label="Train / Val / Test split (space-separated) when no test set"/>
211 <param name="cross_validation" type="boolean" truevalue="true" falsevalue="false" checked="false" label="Enable k-fold cross-validation"/>
212 <param name="num_folds" type="integer" value="5" label="Number of CV folds"/>
213 <param name="epochs" type="integer" optional="true" label="Max epochs"/>
214 <param name="learning_rate" type="float" optional="true" label="Learning rate"/>
215 <param name="batch_size" type="integer" optional="true" label="Batch size"/>
216 <param name="threshold" type="float" optional="true" min="0" max="1" label="Binary classification threshold"/>
217 <param name="hyperparameters" type="text" optional="true" label="Extra AutoGluon hyperparameters (JSON or YAML string)"/>
218 </when>
219 <when value="no"/>
220 </conditional>
221 </inputs>
222
223 <outputs>
224 <data name="output_html" format="html" label="Multimodal Learner analysis report on data ${input_csv.name}"/>
225 <data name="output_config" format="yaml" label="Multimodal Learner training config on data ${input_csv.name}"/>
226 <data name="output_json" format="json" label="Multimodal Learner metric results on data ${input_csv.name}"/>
227 </outputs>
228
229 <tests>
230 <!-- Basic run with images + external test set -->
231 <test expect_num_outputs="3">
232 <param name="input_csv" value="train.csv"/>
233 <param name="target_column" value="7"/>
234 <param name="test_dataset_conditional|has_test_dataset" value="yes"/>
235 <param name="test_dataset_conditional|input_test" value="test.csv"/>
236 <param name="use_images_conditional|use_images" value="yes"/>
237 <param name="use_images_conditional|images_zip_repeat_0|images_zip" value="images.zip"/>
238 <param name="use_images_conditional|backbone_image" value="resnet50"/>
239 <param name="backbone_text" value="google/electra-base-discriminator"/>
240 <output name="output_html">
241 <assert_contents>
242 <has_text text="Model Performance Summary"/>
243 </assert_contents>
244 </output>
245 </test>
246
247 <!-- Custom threshold -->
248 <test expect_num_outputs="3">
249 <param name="input_csv" value="train.csv"/>
250 <param name="target_column" value="7"/>
251 <param name="test_dataset_conditional|has_test_dataset" value="yes"/>
252 <param name="test_dataset_conditional|input_test" value="test.csv"/>
253 <param name="use_images_conditional|use_images" value="yes"/>
254 <param name="use_images_conditional|images_zip_repeat_0|images_zip" value="images.zip"/>
255 <param name="customize_defaults_conditional|customize_defaults" value="yes"/>
256 <param name="customize_defaults_conditional|threshold" value="0.4"/>
257 <output name="output_json">
258 <assert_contents>
259 <has_text text="&quot;threshold&quot;: 0.4"/>
260 </assert_contents>
261 </output>
262 </test>
263
264 <!-- No external test set; internal split -->
265 <test expect_num_outputs="3">
266 <param name="input_csv" value="train.csv"/>
267 <param name="target_column" value="7"/>
268 <param name="test_dataset_conditional|has_test_dataset" value="no"/>
269 <param name="use_images_conditional|use_images" value="yes"/>
270 <param name="use_images_conditional|images_zip_repeat_0|images_zip" value="images.zip"/>
271 <output name="output_json">
272 <assert_contents>
273 <has_text text="&quot;val&quot;"/>
274 </assert_contents>
275 </output>
276 </test>
277
278 <!-- Text/tabular only (ignore images) -->
279 <test expect_num_outputs="3">
280 <param name="input_csv" value="train.csv"/>
281 <param name="target_column" value="7"/>
282 <param name="test_dataset_conditional|has_test_dataset" value="yes"/>
283 <param name="test_dataset_conditional|input_test" value="test.csv"/>
284 <param name="use_images_conditional|use_images" value="no"/>
285 <output name="output_html">
286 <assert_contents>
287 <has_text text="Train and Validation Performance Summary"/>
288 </assert_contents>
289 </output>
290 </test>
291 </tests>
292
293 <help><![CDATA[
294 **AutoGluon Multimodal Learner**
295
296 Trains a powerful multimodal model combining tabular features, images, and text using AutoGluon-Multimodal.
297
298 - Handles missing images intelligently
299 - Supports cross-validation
300 - Produces detailed HTML reports and transparent metrics
301 - Fully reproducible
302
303 Ideal for medical imaging + clinical data, product images + descriptions, etc.
304 ]]></help>
305
306 <citations>
307 <citation type="bibtex">
308 @article{AutoGluon2023,
309 author = {Erickson, Nick and Mueller, Jonas and Wang, Yizhou and others},
310 title = {AutoGluon-Tabular: Robust and Accurate AutoML for Structured Data},
311 journal = {arXiv preprint arXiv:2003.06505},
312 year = {2023}
313 }
314 </citation>
315 </citations>
316 </tool>