comparison flexynesis.xml @ 0:bd808d1c4e0c draft default tip

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/flexynesis commit b6763da7273957b7362787b7fdc6af5572161adb
author bgruening
date Mon, 12 Aug 2024 17:58:14 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:bd808d1c4e0c
1 <tool id="flexynesis" name="Flexynesis" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
2 <description>A deep-learning based multi-omics bulk sequencing data integration suite</description>
3 <macros>
4 <import>macros.xml</import>
5 </macros>
6 <expand macro="edam"/>
7 <expand macro="requirements"/>
8 <command detect_errors="exit_code"><![CDATA[
9 @CHECK_NON_COMMERCIAL_USE@
10 mkdir -p input/test input/train output &&
11 ln -s '$train_clin' input/train/clin.csv &&
12 ln -s '$test_clin' input/test/clin.csv &&
13
14 #if str($assay_main) != '':
15 #set $name = str($assay_main.replace(" ", "_"))
16 ln -s '$train_omics_main' input/train/${name}.csv &&
17 ln -s '$test_omics_main' input/test/${name}.csv &&
18 #set $data_names = [$name]
19 #else
20 ln -s '$train_omics_main' input/train/main.csv &&
21 ln -s '$test_omics_main' input/test/main.csv &&
22 #set $data_names = ['main']
23 #end if
24 #if str($training_type.model) == 'cm_train':
25 #if str($layer_main) == 'input':
26 #set $input_layers = $data_names
27 #set $output_layers = []
28 #else
29 #set $input_layers = []
30 #set $output_layers = $data_names
31 #end if
32 #end if
33 #for $i, $element in enumerate($omics)
34 #if str($element.train_omics) != 'None' and str($element.test_omics) != 'None':
35 #if str($element.assay) != '':
36 #set $i = str($element.assay.replace(" ", "_"))
37 #end if
38 ln -s '${element.train_omics}' input/train/omics_${i}.csv &&
39 ln -s '${element.test_omics}' input/test/omics_${i}.csv &&
40 $data_names.append("omics_" + str($i))
41 #if str($training_type.model) == 'cm_train':
42 #if str($element.layer) == 'input':
43 $input_layers.append("omics_" + str($i))
44 #else
45 $output_layers.append("omics_" + str($i))
46 #end if
47 #end if
48 #end if
49 #end for
50 flexynesis
51 --data_path 'input'
52 --outdir 'output'
53 --model_class $model_class
54 #if str($model_class) == 'GNN':
55 --gnn_conv_type $gnn_conv_type
56 --string_organism $string_organism
57 --string_node_name $string_node_name
58 #end if
59 #if str($training_type.model) == 's_train':
60 #if str($target_variables) != '':
61 --target_variables $target_variables
62 #end if
63 #if str($surv_event_var) != '':
64 --surv_event_var $surv_event_var
65 --surv_time_var $surv_time_var
66 #end if
67 #end if
68 #if str($training_type.model) == 'cm_train':
69 --input_layers $str(",".join($input_layers))
70 --output_layers $str(",".join($output_layers))
71 #end if
72 --fusion_type $fusion_type
73 --hpo_iter $hpo_iter
74 --finetuning_samples $finetuning_samples
75 --variance_threshold $variance_threshold
76 --correlation_threshold $correlation_threshold
77 --subsample $subsample
78 --features_min $features_min
79 --features_top_percentile $features_top_percentile
80 --data_types $str(",".join($data_names))
81 --early_stop_patience $early_stop_patience
82 --hpo_patience $hpo_patience
83 $log_transform
84 $use_loss_weighting
85 $use_cv
86 $evaluate_baseline_performance
87 $disable_marker_finding
88 \${GALAXY_FLEXYNESIS_EXTRA_ARGUMENTS}
89 ]]></command>
90 <inputs>
91 <param name="non_commercial_use" label="I certify that I am not using this tool for commercial purposes." type="boolean" truevalue="NON_COMMERCIAL_USE" falsevalue="COMMERCIAL_USE" checked="False">
92 <validator type="expression" message="This tool is only available for non-commercial use.">value == True</validator>
93 </param>
94 <conditional name="training_type">
95 <param name="model" type="select" label="Type of Analysis" >
96 <option value="s_train">Supervised training</option>
97 <option value="us_train">Unsupervised Training</option>
98 <option value="cm_train">Cross-modality Training</option>
99 </param>
100 <when value="s_train">
101 <expand macro="main_inputs"/>
102 <repeat name="omics" min="0" title="Multiple omics layers?">
103 <expand macro="extra_inputs"/>
104 </repeat>
105 <conditional name="model_class" label="Model class">
106 <param argument="--model_class" type="select" label="Model class" help="The kind of model class to instantiate">
107 <option value="DirectPred">DirectPred</option>
108 <option value="GNN">GNN</option>
109 <option value="MultiTripletNetwork">MultiTripletNetwork</option>
110 <option value="RandomForest">RandomForest</option>
111 <option value="SVM">SVM</option>
112 <option value="RandomSurvivalForest">RandomSurvivalForest</option>
113 </param>
114 <when value="DirectPred"/>
115 <when value="GNN">
116 <param argument="--gnn_conv_type" type="select" label="Which graph convolution type to use.">
117 <option value="GC">GC</option>
118 <option value="GCN">GCN</option>
119 <option value="SAGE">SAGE</option>
120 </param>
121 <param argument="--string_organism" type="select" label="STRING DB organism">
122 <option value="9606">Homo sapiens</option>
123 <option value="10090">Mus musculus</option>
124 <option value="10116">Rattus norvegicus</option>
125 <option value="9544">Macaca mulatta</option>
126 </param>
127 <param argument="--string_node_name" type="select" label="String node name" >
128 <option value="gene_name">Gene name</option>
129 <option value="gene_id">Gene id</option>
130 </param>
131 </when>
132 <when value="MultiTripletNetwork"/>
133 <when value="RandomForest"/>
134 <when value="SVM"/>
135 <when value="RandomSurvivalForest"/>
136 </conditional>
137 <param argument="--target_variables" type="text" label="Target variables" help="Which variables in 'clin.csv' to use for predictions, comma-separated if multiple.">
138 <sanitizer invalid_char="">
139 <valid initial="string.printable"></valid>
140 </sanitizer>
141 </param>
142 <param argument="--surv_event_var" type="text" label="Survival event" help="Which column in 'clin.csv' to use as event/status indicator for survival modeling.">
143 <sanitizer invalid_char="">
144 <valid initial="string.printable"></valid>
145 </sanitizer>
146 </param>
147 <param argument="--surv_time_var" type="text" label="Survival time" help="Which column in 'clin.csv' to use as time/duration indicator for survival modeling.">
148 <sanitizer invalid_char="">
149 <valid initial="string.printable"></valid>
150 </sanitizer>
151 </param>
152 <expand macro="advanced"/>
153 </when>
154 <when value="us_train">
155 <expand macro="main_inputs"/>
156 <repeat name="omics" min="0" title="Multiple omics layers?">
157 <expand macro="extra_inputs"/>
158 </repeat>
159 <param argument="--model_class" type="select" label="Model class" help="The kind of model class to instantiate">
160 <option value="supervised_vae">supervised_vae</option>
161 </param>
162 <expand macro="advanced"/>
163 </when>
164 <when value="cm_train">
165 <expand macro="main_inputs"/>
166 <param name="layer_main" type="select" label="Use this omics data as input or output layer?">
167 <option value="input">Input</option>
168 <option value="output">output</option>
169 </param>
170 <repeat name="omics" min="0" title="Multiple omics layers?">
171 <expand macro="extra_inputs"/>
172 <param name="layer" type="select" label="Use this omics data as input or output layer?">
173 <option value="input">Input</option>
174 <option value="output">output</option>
175 </param>
176 </repeat>
177 <param argument="--model_class" type="select" label="Model class" help="The kind of model class to instantiate">
178 <option value="CrossModalPred">CrossModalPred</option>
179 </param>
180 <expand macro="advanced"/>
181 </when>
182 </conditional>
183 </inputs>
184 <outputs>
185 <collection name="results" type="list" label="${tool.name} on ${on_string}: results">
186 <discover_datasets pattern="(?P&lt;name&gt;.+)\.csv$" format="csv" directory="output"/>
187 </collection>
188 </outputs>
189 <tests>
190 <test>
191 <param name="non_commercial_use" value="True"/>
192 <param name="train_clin" value="train/clin" ftype="csv"/>
193 <param name="test_clin" value="test/clin" ftype="csv"/>
194 <param name="train_omics_main" value="train/gex" ftype="csv"/>
195 <param name="test_omics_main" value="test/gex" ftype="csv"/>
196 <param name="assay_main" value="bar"/>
197 <repeat name="omics">
198 <param name="train_omics" value="train/cnv" ftype="csv"/>
199 <param name="test_omics" value="test/cnv" ftype="csv"/>
200 <param name="assay" value="foo"/>
201 </repeat>
202 <conditional name="training_type">
203 <param name="model" value="s_train"/>
204 <param name="model_class" value="DirectPred"/>
205 <param name="target_variables" value="Erlotinib"/>
206 </conditional>
207 <param name="hpo_iter" value="1"/>
208 <output_collection name="results" type="list">
209 <element name="job.embeddings_test">
210 <assert_contents>
211 <has_n_lines n="50"/>
212 </assert_contents>
213 </element>
214 <element name="job.embeddings_train">
215 <assert_contents>
216 <has_n_lines n="50"/>
217 </assert_contents>
218 </element>
219 <element name="job.feature_importance">
220 <assert_contents>
221 <has_text_matching expression="Erlotinib,0,,bar,A2M,"/>
222 <has_text_matching expression="Erlotinib,0,,bar,ABCC4,"/>
223 </assert_contents>
224 </element>
225 <element name="job.feature_logs.bar">
226 <assert_contents>
227 <has_n_lines n="25"/>
228 </assert_contents>
229 </element>
230 <element name="job.feature_logs.omics_foo">
231 <assert_contents>
232 <has_n_lines n="25"/>
233 </assert_contents>
234 </element>
235 <element name="job.predicted_labels">
236 <assert_contents>
237 <has_text_matching expression="source_dataset:A-704,Erlotinib,"/>
238 <has_text_matching expression="target_dataset:KMRC-20,Erlotinib,"/>
239 </assert_contents>
240 </element>
241 <element name="job.stats">
242 <assert_contents>
243 <has_text_matching expression="DirectPred,Erlotinib,numerical,mse,"/>
244 <has_text_matching expression="DirectPred,Erlotinib,numerical,r2,"/>
245 <has_text_matching expression="DirectPred,Erlotinib,numerical,pearson_corr,"/>
246 </assert_contents>
247 </element>
248 </output_collection>
249 </test>
250 <test>
251 <param name="non_commercial_use" value="True"/>
252 <param name="train_clin" value="train/clin" ftype="csv"/>
253 <param name="test_clin" value="test/clin" ftype="csv"/>
254 <param name="train_omics_main" value="train/gex" ftype="csv"/>
255 <param name="test_omics_main" value="test/gex" ftype="csv"/>
256 <param name="assay_main" value="bar"/>
257 <conditional name="training_type">
258 <param name="model" value="s_train"/>
259 <param name="model_class" value="DirectPred"/>
260 <param name="target_variables" value="Erlotinib"/>
261 </conditional>
262 <param name="hpo_iter" value="1"/>
263 <output_collection name="results" type="list">
264 <element name="job.embeddings_test">
265 <assert_contents>
266 <has_n_lines n="50"/>
267 </assert_contents>
268 </element>
269 <element name="job.embeddings_train">
270 <assert_contents>
271 <has_n_lines n="50"/>
272 </assert_contents>
273 </element>
274 <element name="job.feature_importance">
275 <assert_contents>
276 <has_text_matching expression="Erlotinib,0,,bar,A2M,"/>
277 <has_text_matching expression="Erlotinib,0,,bar,ABCC4,"/>
278 </assert_contents>
279 </element>
280 <element name="job.feature_logs.bar">
281 <assert_contents>
282 <has_n_lines n="25"/>
283 </assert_contents>
284 </element>
285 <element name="job.predicted_labels">
286 <assert_contents>
287 <has_text_matching expression="source_dataset:A-704,Erlotinib,"/>
288 <has_text_matching expression="target_dataset:KMRC-20,Erlotinib,"/>
289 </assert_contents>
290 </element>
291 <element name="job.stats">
292 <assert_contents>
293 <has_text_matching expression="DirectPred,Erlotinib,numerical,mse,"/>
294 <has_text_matching expression="DirectPred,Erlotinib,numerical,r2,"/>
295 <has_text_matching expression="DirectPred,Erlotinib,numerical,pearson_corr,"/>
296 </assert_contents>
297 </element>
298 </output_collection>
299 </test>
300 <test>
301 <param name="non_commercial_use" value="True"/>
302 <param name="train_clin" value="train/clin" ftype="csv"/>
303 <param name="test_clin" value="test/clin" ftype="csv"/>
304 <param name="train_omics_main" value="train/gex" ftype="csv"/>
305 <param name="test_omics_main" value="test/gex" ftype="csv"/>
306 <param name="assay_main" value="bar"/>
307 <repeat name="omics">
308 <param name="train_omics" value="train/cnv" ftype="csv"/>
309 <param name="test_omics" value="test/cnv" ftype="csv"/>
310 <param name="assay" value="foo"/>
311 </repeat>
312 <conditional name="training_type">
313 <param name="model" value="s_train"/>
314 <param name="model_class" value="DirectPred"/>
315 <param name="target_variables" value="Irinotecan"/>
316 </conditional>
317 <param name="hpo_iter" value="1"/>
318 <output_collection name="results" type="list">
319 <element name="job.embeddings_test">
320 <assert_contents>
321 <has_n_lines n="50"/>
322 </assert_contents>
323 </element>
324 <element name="job.embeddings_train">
325 <assert_contents>
326 <has_n_lines n="50"/>
327 </assert_contents>
328 </element>
329 <element name="job.feature_importance">
330 <assert_contents>
331 <has_text_matching expression="Irinotecan,0,,bar,A2M,"/>
332 <has_text_matching expression="Irinotecan,0,,bar,ABCC4,"/>
333 </assert_contents>
334 </element>
335 <element name="job.feature_logs.bar">
336 <assert_contents>
337 <has_n_lines n="25"/>
338 </assert_contents>
339 </element>
340 <element name="job.feature_logs.bar">
341 <assert_contents>
342 <has_n_lines n="25"/>
343 </assert_contents>
344 </element>
345 <element name="job.predicted_labels">
346 <assert_contents>
347 <has_text_matching expression="source_dataset:A-704,Irinotecan,"/>
348 <has_text_matching expression="target_dataset:KMRC-20,Irinotecan,"/>
349 </assert_contents>
350 </element>
351 <element name="job.stats">
352 <assert_contents>
353 <has_text_matching expression="DirectPred,Irinotecan,numerical,mse,"/>
354 <has_text_matching expression="DirectPred,Irinotecan,numerical,r2,"/>
355 <has_text_matching expression="DirectPred,Irinotecan,numerical,pearson_corr,"/>
356 </assert_contents>
357 </element>
358 </output_collection>
359 </test>
360 <test>
361 <param name="non_commercial_use" value="True"/>
362 <param name="train_clin" value="train/clin" ftype="csv"/>
363 <param name="test_clin" value="test/clin" ftype="csv"/>
364 <param name="train_omics_main" value="train/gex" ftype="csv"/>
365 <param name="test_omics_main" value="test/gex" ftype="csv"/>
366 <param name="assay_main" value="bar"/>
367 <repeat name="omics">
368 <param name="train_omics" value="train/cnv" ftype="csv"/>
369 <param name="test_omics" value="test/cnv" ftype="csv"/>
370 <param name="assay" value="foo"/>
371 </repeat>
372 <conditional name="training_type">
373 <param name="model" value="us_train"/>
374 <param name="model_class" value="supervised_vae"/>
375 </conditional>
376 <param name="hpo_iter" value="1"/>
377 <output_collection name="results" type="list">
378 <element name="job.embeddings_test">
379 <assert_contents>
380 <has_n_lines n="50"/>
381 </assert_contents>
382 </element>
383 <element name="job.embeddings_train">
384 <assert_contents>
385 <has_n_lines n="50"/>
386 </assert_contents>
387 </element>
388 <element name="job.feature_logs.bar">
389 <assert_contents>
390 <has_n_lines n="25"/>
391 </assert_contents>
392 </element>
393 <element name="job.feature_logs.omics_foo">
394 <assert_contents>
395 <has_n_lines n="25"/>
396 </assert_contents>
397 </element>
398 </output_collection>
399 </test>
400 <test>
401 <param name="non_commercial_use" value="True"/>
402 <param name="train_clin" value="train/clin" ftype="csv"/>
403 <param name="test_clin" value="test/clin" ftype="csv"/>
404 <param name="train_omics_main" value="train/gex" ftype="csv"/>
405 <param name="test_omics_main" value="test/gex" ftype="csv"/>
406 <param name="assay_main" value="bar"/>
407 <param name="layer_main" value="input"/>
408 <repeat name="omics">
409 <param name="train_omics" value="train/cnv" ftype="csv"/>
410 <param name="test_omics" value="test/cnv" ftype="csv"/>
411 <param name="assay" value="foo"/>
412 <param name="layer" value="output"/>
413 </repeat>
414 <conditional name="training_type">
415 <param name="model" value="cm_train"/>
416 <param name="model_class" value="CrossModalPred"/>
417 </conditional>
418 <param name="hpo_iter" value="1"/>
419 <output_collection name="results" type="list">
420 <element name="job.embeddings_test">
421 <assert_contents>
422 <has_n_lines n="50"/>
423 </assert_contents>
424 </element>
425 <element name="job.embeddings_train">
426 <assert_contents>
427 <has_n_lines n="50"/>
428 </assert_contents>
429 </element>
430 <element name="job.feature_logs.bar">
431 <assert_contents>
432 <has_n_lines n="25"/>
433 </assert_contents>
434 </element>
435 <element name="job.feature_logs.omics_foo">
436 <assert_contents>
437 <has_n_lines n="25"/>
438 </assert_contents>
439 </element>
440 <element name="job.test_decoded.omics_foo">
441 <assert_contents>
442 <has_n_lines n="23"/>
443 </assert_contents>
444 </element>
445 <element name="job.train_decoded.omics_foo">
446 <assert_contents>
447 <has_n_lines n="23"/>
448 </assert_contents>
449 </element>
450 </output_collection>
451 </test>
452 <test>
453 <param name="non_commercial_use" value="True"/>
454 <param name="train_clin" value="train/clin" ftype="csv"/>
455 <param name="test_clin" value="test/clin" ftype="csv"/>
456 <param name="train_omics_main" value="train/gex" ftype="csv"/>
457 <param name="test_omics_main" value="test/gex" ftype="csv"/>
458 <param name="assay_main" value="bar"/>
459 <repeat name="omics">
460 <param name="train_omics" value="train/cnv" ftype="csv"/>
461 <param name="test_omics" value="test/cnv" ftype="csv"/>
462 <param name="assay" value="foo"/>
463 </repeat>
464 <conditional name="training_type">
465 <param name="model" value="s_train"/>
466 <param name="model_class" value="GNN"/>
467 <param name="gnn_conv_type" value="GC"/>
468 <param name="string_organism" value="9606"/>
469 <param name="string_node_name" value="gene_name"/>
470 <param name="target_variables" value="Erlotinib"/>
471 </conditional>
472 <param name="hpo_iter" value="1"/>
473 <output_collection name="results" type="list">
474 <element name="job.embeddings_test">
475 <assert_contents>
476 <has_n_lines n="50"/>
477 </assert_contents>
478 </element>
479 <element name="job.embeddings_train">
480 <assert_contents>
481 <has_n_lines n="50"/>
482 </assert_contents>
483 </element>
484 <element name="job.feature_importance">
485 <assert_contents>
486 <has_text_matching expression="Erlotinib,0,,bar,A2M,"/>
487 <has_text_matching expression="Erlotinib,0,,bar,ABCC4,"/>
488 </assert_contents>
489 </element>
490 <element name="job.feature_logs.bar">
491 <assert_contents>
492 <has_n_lines n="25"/>
493 </assert_contents>
494 </element>
495 <element name="job.feature_logs.omics_foo">
496 <assert_contents>
497 <has_n_lines n="25"/>
498 </assert_contents>
499 </element>
500 <element name="job.predicted_labels">
501 <assert_contents>
502 <has_text_matching expression="source_dataset:A-704,Erlotinib,"/>
503 <has_text_matching expression="target_dataset:KMRC-20,Erlotinib,"/>
504 </assert_contents>
505 </element>
506 <element name="job.stats">
507 <assert_contents>
508 <has_text_matching expression="DirectPred,Erlotinib,numerical,mse,"/>
509 <has_text_matching expression="DirectPred,Erlotinib,numerical,r2,"/>
510 <has_text_matching expression="DirectPred,Erlotinib,numerical,pearson_corr,"/>
511 </assert_contents>
512 </element>
513 </output_collection>
514 </test>
515 <test>
516 <param name="non_commercial_use" value="True"/>
517 <param name="train_clin" value="train/clin" ftype="csv"/>
518 <param name="test_clin" value="test/clin" ftype="csv"/>
519 <param name="train_omics_main" value="train/gex" ftype="csv"/>
520 <param name="test_omics_main" value="test/gex" ftype="csv"/>
521 <param name="assay_main" value="b ar"/>
522 <repeat name="omics">
523 <param name="train_omics" value="train/cnv" ftype="csv"/>
524 <param name="test_omics" value="test/cnv" ftype="csv"/>
525 <param name="assay" value="f oo"/>
526 </repeat>
527 <conditional name="training_type">
528 <param name="model" value="us_train"/>
529 <param name="model_class" value="supervised_vae"/>
530 </conditional>
531 <param name="hpo_iter" value="1"/>
532 <output_collection name="results" type="list">
533 <element name="job.embeddings_test">
534 <assert_contents>
535 <has_n_lines n="50"/>
536 </assert_contents>
537 </element>
538 <element name="job.embeddings_train">
539 <assert_contents>
540 <has_n_lines n="50"/>
541 </assert_contents>
542 </element>
543 <element name="job.feature_logs.b_ar">
544 <assert_contents>
545 <has_n_lines n="25"/>
546 </assert_contents>
547 </element>
548 <element name="job.feature_logs.omics_f_oo">
549 <assert_contents>
550 <has_n_lines n="25"/>
551 </assert_contents>
552 </element>
553 </output_collection>
554 </test>
555 </tests>
556 <help>
557 .. class:: warningmark
558
559 **WARNING: This tool is only available for NON-COMMERCIAL use. Permission is only granted for academic, research, and educational purposes. Before using, be sure to review, agree, and comply with the license.**
560
561 Flexynesis is a deep-learning based multi-omics bulk sequencing data integration suite with a focus on (pre-)clinical endpoint prediction.
562 The package includes multiple types of deep learning architectures such as simple fully connected networks, supervised variational autoencoders, graph convolutional networks, multi-triplet networks different options of data layer fusion, and automates feature selection and hyperparameter optimisation.
563
564 For more information, please check the Documentation_ :
565
566 For commercial use, please review the license_ and contact the `copyright holders`_ .
567
568 -----
569
570 .. image:: https://raw.githubusercontent.com/BIMSBbioinfo/flexynesis/c4634d97f84e51f569dcfdab2caf42c9be453ef6/img/graphical_abstract.jpg
571 :width: 600
572
573 -----
574
575 **Input Files**
576
577 **clin.csv**
578
579 clin.csv contains the sample metadata. The first column contains unique sample identifiers. The other columns contain sample-associated clinical variables. NA values are allowed in the clinical variables.
580
581 The format might look like so:
582
583 ======== === === ===
584 , v1 v2 ...
585 -------- --- --- ---
586 sample1 a b ...
587 -------- --- --- ---
588 sample2 c d ...
589 -------- --- --- ---
590 sample3 e f ...
591 -------- --- --- ---
592 ... ... ... ...
593 ======== === === ===
594
595 .
596
597 **omics.csv**
598
599 The first column of the feature tables must be unique feature identifiers (e.g. gene names). The column names must be sample identifiers that should overlap with those in the clin.csv. They don't have to be completely identical or in the same order. Samples from the clin.csv that are not represented in the omics table will be dropped.
600
601 The format might look like so:
602
603 ===== ======= ======= ======= =======
604 , sample1 sample2 sample3 ...
605 ----- ------- ------- ------- -------
606 gene1 0 1 2 ...
607 ----- ------- ------- ------- -------
608 gene2 3 3 5 ...
609 ----- ------- ------- ------- -------
610 gene3 2 3 4 ...
611 ----- ------- ------- ------- -------
612 ... ... ... ... ...
613 ===== ======= ======= ======= =======
614
615 .
616
617 .. class:: infomark
618
619 **Concordance between train/test splits:**
620
621 The corresponding omics files in train/test splits must contain overlapping feature names (they don't have to be identical or in the same order). The clin.csv files in train/test must contain matching clinical variables.
622
623 -----
624
625 **Supervised Training**
626
627 **Minimum requirements**
628
629 * clin.csv and omics.csv files for training and testing
630 * Selection of a tool/model
631 * One target variable which can be numerical or categorical for regression/classification tasks.
632
633 Flexynesis supports both single-task and multi-task training. We can provide one or more target variables and optionally survival variables as input and Flexynesis will build the appropriate model architecture. If the selected variable is numerical, a Multi-Layered-Perceptron (MLP) with MSE loss will be used. If a categorical variable is provided, an MLP with cross-entropy-loss will be utilized. If survival variables are provided, an MLP with Cox-Proportional-Hazards loss will be attached to the model.
634
635 **Regression:**
636
637
638 If your target variable is numerical, Flexynesis will build a regression model.
639
640 **Classification:**
641
642
643 If your target variable is categorical, Flexynesis will build a classification model.
644
645 **Survival Analysis:**
646
647
648 If your target variable is survival data, Flexynesis will build a survival analysis model.
649 For survival analysis, two separate variables are required, where the first variable is a numeric event variable (consisting of 0's or 1's, where 1 means an event such as disease progression or death has occurred). The second variable is also a numeric time variable, which indicates how much time it took since last patient follow-up.
650
651 .. class:: infomark
652
653 **Note:** Flexynesis can be trained with multiple target variables, which can be a mixture of regression/classification/survival tasks.
654
655 .. class:: infomark
656
657 **Note:** For the supervised tasks, the user can easily switch between different model architectures.
658
659 .. class:: infomark
660
661 **Note:** If you choose **MultiTripletNetwork** model, the first target variable should be a categorical variable.
662
663 .. class:: infomark
664
665 **Note:** If you choose **GNN** model, the features should have the same naming convention between different omics modalities.
666
667 .. class:: infomark
668
669 **Note:** The **GNN** model only works with genes (for example CpG methylation sites does not work). The reason is that GNNs require a prior knowledge network, which is currently set to use STRING database.
670
671 -----
672
673 **Unsupervised Training**
674
675 In the absence of any target variables or survival variables, you can use a VAE architecture to carry out unsupervised training.
676
677 -----
678
679 **Cross-modality Training**
680
681 We have implemented a special case of VAEs where the input data layers and output data layers can be set to different data modalities. The purpose of a cross-modality encoder is to learn embeddings that can translate from one data modality to another. Crossmodality encoder we implemented supports both single/multiple input layers and also one or more target/survival variables can be added to the model.
682
683 .. class:: infomark
684
685 **Note:** if you use same input and output layers, it will be the same as unsupervised training.
686
687 -----
688
689 .. class:: infomark
690
691 **Modality fusion:**
692
693 Flexynesis currently supports two main ways of fusing different omics data modalities: 1. Early fusion: The input data matrices are initially concatenated and pushed through the networks 2. Intermediate fusion: The input data matrices are initially pushed through the networks to obtain a modality-specific embedding space, which then gets concatenated to serve as input for the supervisor MLPs.
694
695 .. _license: https://github.com/BIMSBbioinfo/flexynesis/blob/main/LICENSE
696 .. _Documentation: https://bimsbstatic.mdc-berlin.de/akalin/buyar/flexynesis/site/
697 .. _copyright holders: https://github.com/BIMSBbioinfo/flexynesis
698 </help>
699
700 <expand macro="citations" />
701 </tool>