comparison msi_classification.xml @ 0:f0b415eb3bcf draft default tip

planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/msi_classification commit 8087490eb4dcaf4ead0f03eae4126780d21e5503
author galaxyp
date Fri, 06 Jul 2018 14:12:51 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:f0b415eb3bcf
1 <tool id="mass_spectrometry_imaging_classification" name="MSI classification" version="1.10.0.0">
2 <description>spatial classification of mass spectrometry imaging data</description>
3 <requirements>
4 <requirement type="package" version="1.10.0">bioconductor-cardinal</requirement>
5 <requirement type="package" version="2.2.1">r-gridextra</requirement>
6 <requirement type="package" version="0.20-35">r-lattice</requirement>
7 <requirement type="package" version="2.2.1">r-ggplot2</requirement>
8 </requirements>
9 <command detect_errors="exit_code">
10 <![CDATA[
11
12 #if $infile.ext == 'imzml'
13 ln -s '${infile.extra_files_path}/imzml' infile.imzML &&
14 ln -s '${infile.extra_files_path}/ibd' infile.ibd &&
15 #elif $infile.ext == 'analyze75'
16 ln -s '${infile.extra_files_path}/hdr' infile.hdr &&
17 ln -s '${infile.extra_files_path}/img' infile.img &&
18 ln -s '${infile.extra_files_path}/t2m' infile.t2m &&
19 #else
20 ln -s $infile infile.RData &&
21 #end if
22 cat '${MSI_segmentation}' &&
23 echo ${MSI_segmentation} &&
24 Rscript '${MSI_segmentation}'
25
26 ]]>
27 </command>
28 <configfiles>
29 <configfile name="MSI_segmentation"><![CDATA[
30
31
32 ################################# load libraries and read file #########################
33
34 library(Cardinal)
35 library(gridExtra)
36 library(lattice)
37 library(ggplot2)
38
39
40 #if $infile.ext == 'imzml'
41 #if str($processed_cond.processed_file) == "processed":
42 msidata <- readImzML('infile', mass.accuracy=$processed_cond.accuracy, units.accuracy = "$processed_cond.units")
43 #else
44 msidata <- readImzML('infile')
45 #end if
46 #elif $infile.ext == 'analyze75'
47 msidata = readAnalyze('infile')
48 #else
49 load('infile.RData')
50 #end if
51
52 ## function to later read RData reference files in
53
54 loadRData <- function(fileName){
55 #loads an RData file, and returns it
56 load(fileName)
57 get(ls()[ls() != "fileName"])
58 }
59
60 ## create full matrix to make processed imzML files compatible with classification
61 iData(msidata) <- iData(msidata)[]
62
63 ###################################### file properties in numbers ##############
64
65 ## Number of features (mz)
66 maxfeatures = length(features(msidata))
67 ## Range mz
68 minmz = round(min(mz(msidata)), digits=2)
69 maxmz = round(max(mz(msidata)), digits=2)
70 ## Number of spectra (pixels)
71 pixelcount = length(pixels(msidata))
72 ## Range x coordinates
73 minimumx = min(coord(msidata)[,1])
74 maximumx = max(coord(msidata)[,1])
75 ## Range y coordinates
76 minimumy = min(coord(msidata)[,2])
77 maximumy = max(coord(msidata)[,2])
78 ## Range of intensities
79 minint = round(min(spectra(msidata)[]), digits=2)
80 maxint = round(max(spectra(msidata)[]), digits=2)
81 medint = round(median(spectra(msidata)[]), digits=2)
82 ## Number of intensities > 0
83 npeaks= sum(spectra(msidata)[]>0)
84 ## Spectra multiplied with mz (potential number of peaks)
85 numpeaks = ncol(spectra(msidata)[])*nrow(spectra(msidata)[])
86 ## Percentage of intensities > 0
87 percpeaks = round(npeaks/numpeaks*100, digits=2)
88 ## Number of empty TICs
89 TICs = colSums(spectra(msidata)[])
90 NumemptyTIC = sum(TICs == 0)
91
92
93 ## Processing informations
94 processinginfo = processingData(msidata)
95 centroidedinfo = processinginfo@centroided # TRUE or FALSE
96
97 ## if TRUE write processinginfo if no write FALSE
98
99 ## normalization
100 if (length(processinginfo@normalization) == 0) {
101 normalizationinfo='FALSE'
102 } else {
103 normalizationinfo=processinginfo@normalization
104 }
105 ## smoothing
106 if (length(processinginfo@smoothing) == 0) {
107 smoothinginfo='FALSE'
108 } else {
109 smoothinginfo=processinginfo@smoothing
110 }
111 ## baseline
112 if (length(processinginfo@baselineReduction) == 0) {
113 baselinereductioninfo='FALSE'
114 } else {
115 baselinereductioninfo=processinginfo@baselineReduction
116 }
117 ## peak picking
118 if (length(processinginfo@peakPicking) == 0) {
119 peakpickinginfo='FALSE'
120 } else {
121 peakpickinginfo=processinginfo@peakPicking
122 }
123
124 #############################################################################
125
126 properties = c("Number of mz features",
127 "Range of mz values",
128 "Number of pixels",
129 "Range of x coordinates",
130 "Range of y coordinates",
131 "Range of intensities",
132 "Median of intensities",
133 "Intensities > 0",
134 "Number of empty spectra",
135 "Preprocessing",
136 "Normalization",
137 "Smoothing",
138 "Baseline reduction",
139 "Peak picking",
140 "Centroided")
141
142 values = c(paste0(maxfeatures),
143 paste0(minmz, " - ", maxmz),
144 paste0(pixelcount),
145 paste0(minimumx, " - ", maximumx),
146 paste0(minimumy, " - ", maximumy),
147 paste0(minint, " - ", maxint),
148 paste0(medint),
149 paste0(percpeaks, " %"),
150 paste0(NumemptyTIC),
151 paste0(" "),
152 paste0(normalizationinfo),
153 paste0(smoothinginfo),
154 paste0(baselinereductioninfo),
155 paste0(peakpickinginfo),
156 paste0(centroidedinfo))
157
158 property_df = data.frame(properties, values)
159
160
161 ######################################## PDF ###################################
162 ################################################################################
163 ################################################################################
164
165 Title = "Prediction"
166
167 #if str( $type_cond.type_method) == "training":
168 #if str( $type_cond.method_cond.class_method) == "PLS":
169 Title = "PLS"
170 #elif str( $type_cond.method_cond.class_method) == "OPLS":
171 Title = "OPLS"
172 #elif str( $type_cond.method_cond.class_method) == "spatialShrunkenCentroids":
173 Title = "SSC"
174 #end if
175 #end if
176
177 pdf("classificationpdf.pdf", fonts = "Times", pointsize = 12)
178 plot(0,type='n',axes=FALSE,ann=FALSE)
179
180
181 title(main=paste0(Title," for file: \n\n", "$infile.display_name"))
182
183
184
185 ##################### I) numbers and control plots #############################
186 ###############################################################################
187
188 ## table with values
189 grid.table(property_df, rows= NULL)
190
191 if (npeaks > 0){
192
193 opar <- par()
194
195 ######################## II) Training #############################
196 #############################################################################
197 #if str( $type_cond.type_method) == "training":
198 print("training")
199
200
201 ## load y response (will be needed in every training scenario)
202
203 #if str($type_cond.y_cond.y_vector) == "y_internal":
204 y_vector = msidata\$$type_cond.y_cond.y_name
205 #elif str($type_cond.y_cond.y_vector) == "y_external":
206 y_tabular = read.delim("$type_cond.y_cond.y_data", header = FALSE, stringsAsFactors = FALSE)
207 y_vector = as.factor(y_tabular[,$type_cond.y_cond.y_column])
208 number_pixels = length(y_vector) ## should be same as in data
209 #end if
210
211 ## plot of y vector
212
213 position_df = cbind(coord(msidata)[,1:2], y_vector)
214 y_plot = ggplot(position_df, aes(x=x, y=y, fill=y_vector))+
215 geom_tile() +
216 coord_fixed()+
217 ggtitle("Distribution of the response variable y")+
218 theme_bw()+
219 theme(text=element_text(family="ArialMT", face="bold", size=15))+
220 theme(legend.position="bottom",legend.direction="vertical")+
221 guides(fill=guide_legend(ncol=4,byrow=TRUE))
222 coord_labels = aggregate(cbind(x,y)~y_vector, data=position_df, mean, na.rm=TRUE, na.action="na.pass")
223 coord_labels\$file_number = gsub( "_.*$", "", coord_labels\$y_vector)
224 print(y_plot)
225
226
227 ######################## PLS #############################
228 #if str( $type_cond.method_cond.class_method) == "PLS":
229 print("PLS")
230
231 ######################## PLS - CV #############################
232 #if str( $type_cond.method_cond.analysis_cond.PLS_method) == "cvapply":
233 print("PLS cv")
234
235 ## folds
236 #if str($type_cond.method_cond.analysis_cond.fold_cond.fold_vector) == "fold_internal":
237
238 fold_vector = msidata\$$type_cond.method_cond.analysis_cond.fold_cond.fold_name
239 #elif str($type_cond.method_cond.analysis_cond.fold_cond.fold_vector) == "fold_external":
240 fold_tabular = read.delim("$type_cond.method_cond.analysis_cond.fold_cond.fold_data", header = FALSE, stringsAsFactors = FALSE)
241 fold_vector = as.factor(fold_tabular[,$type_cond.method_cond.analysis_cond.fold_cond.fold_column])
242 number_pixels = length(fold_vector) ## should be same as in data
243 #end if
244
245 ## plot of folds
246
247 position_df = cbind(coord(msidata)[,1:2], fold_vector)
248 fold_plot = ggplot(position_df, aes(x=x, y=y, fill=fold_vector))+
249 geom_tile() +
250 coord_fixed()+
251 ggtitle("Distribution of the fold variable")+
252 theme_bw()+
253 theme(text=element_text(family="ArialMT", face="bold", size=15))+
254 theme(legend.position="bottom",legend.direction="vertical")+
255 guides(fill=guide_legend(ncol=4,byrow=TRUE))
256 coord_labels = aggregate(cbind(x,y)~fold_vector, data=position_df, mean, na.rm=TRUE, na.action="na.pass")
257 coord_labels\$file_number = gsub( "_.*$", "", coord_labels\$fold_vector)
258 print(fold_plot)
259
260 ## number of components
261 components = c($type_cond.method_cond.analysis_cond.plscv_comp)
262
263 ## PLS-cvApply:
264 msidata.cv.pls <- cvApply(msidata, .y = y_vector, .fold = fold_vector, .fun = "PLS", ncomp = components)
265
266 ## create table with summary
267 count = 1
268 summary_plscv = list()
269 accuracy_vector = numeric()
270 for (iteration in components){
271
272 summary_iteration = summary(msidata.cv.pls)\$accuracy[[paste0("ncomp = ", iteration)]]
273 summary_iteration = cbind(rownames(summary_iteration), summary_iteration) ## include rownames in table
274 accuracy_vector[count] = summary_iteration[1,2] ## vector with accuracies to find later maximum for plot
275 empty_row = c(paste0("ncomp = ", iteration), rep( "", length(levels(y_vector)))) ## add line with ncomp for each iteration
276 ##rownames(labeled_iteration)[1] = paste0("ncomp = ", iteration)
277 ##labeled_iteration = cbind(rownames(labeled_iteration), labeled_iteration)
278 labeled_iteration = rbind(empty_row, summary_iteration)
279
280 summary_plscv[[count]] = labeled_iteration
281 count = count+1} ## create list with summary table for each component
282 ## create dataframe from list
283 summary_plscv = do.call(rbind, summary_plscv)
284 summary_df = as.data.frame(summary_plscv)
285 rownames(summary_df) = NULL
286
287 ## plots
288 ## plot to find ncomp with highest accuracy
289 plot(summary(msidata.cv.pls), main="Accuracy of PLS classification")
290 ncomp_max = components[which.max(accuracy_vector)] ## find ncomp with max. accuracy
291 ## one image for each sample/fold, 4 images per page
292 image(msidata.cv.pls, model = list(ncomp = ncomp_max), layout = c(2, 2))
293
294 par(opar)
295 ## print table with summary in pdf
296 plot(0,type='n',axes=FALSE,ann=FALSE)
297 title(main="Summary for the different components\n", adj=0.5)
298 ## summary for 4 components (20 rows) fits in one page:
299 if (length(components)<5){
300 grid.table(summary_df, rows= NULL)
301 }else{
302 grid.table(summary_df[1:20,], rows= NULL)
303 mincount = 21
304 maxcount = 40
305 for (count20 in 1:(ceiling(nrow(summary_df)/20)-1)){
306 plot(0,type='n',axes=FALSE,ann=FALSE)
307 if (maxcount <= nrow(summary_df)){
308 grid.table(summary_df[mincount:maxcount,], rows= NULL)
309 mincount = mincount+20
310 maxcount = maxcount+20
311 }else{### stop last page with last sample otherwise NA in table
312 grid.table(summary_df[mincount:nrow(summary_df),], rows= NULL)}
313 }
314 }
315
316 ## optional output as .RData
317 #if $output_rdata:
318 save(msidata.cv.pls, file="$classification_rdata")
319 #end if
320 ######################## PLS - analysis ###########################
321 #elif str( $type_cond.method_cond.analysis_cond.PLS_method) == "PLS_analysis":
322 print("PLS analysis")
323
324 ## number of components
325 component = c($type_cond.method_cond.analysis_cond.pls_comp)
326
327 ### pls analysis
328 msidata.pls <- PLS(msidata, y = y_vector, ncomp = component, scale=$type_cond.method_cond.analysis_cond.pls_scale)
329
330 ### plot of PLS coefficients
331 plot(msidata.pls, main="PLS coefficients per m/z")
332
333 ### summary table of PLS
334 summary_table = summary(msidata.pls)\$accuracy[[paste0("ncomp = ",component)]]
335 summary_table = cbind(rownames(summary_table), data.frame(summary_table))
336 rownames(summary_table) = NULL
337 print(summary_table)
338 ###plot(0,type='n',axes=FALSE,ann=FALSE)
339 ###grid.table(test, rows= TRUE)
340
341 ### image of the best m/z
342 print(image(msidata, mz = topLabels(msidata.pls)[1,1], normalize.image = "linear", contrast.enhance = "histogram",smooth.image="gaussian", main="best m/z heatmap"))
343
344 ## m/z and pixel information output
345 pls_classes = data.frame(msidata.pls\$classes[[1]])
346 rownames(pls_classes) = names(pixels(msidata))
347 colnames(pls_classes) = "predicted diagnosis"
348 pls_toplabels = topLabels(msidata.pls, n=$type_cond.method_cond.analysis_cond.pls_toplabels)
349
350 write.table(pls_toplabels, file="$mzfeatures", quote = FALSE, row.names = TRUE, col.names=NA, sep = "\t")
351 write.table(pls_classes, file="$pixeloutput", quote = FALSE, row.names = TRUE, col.names=NA, sep = "\t")
352
353 ## optional output as .RData
354 #if $output_rdata:
355 save(msidata.pls, file="$classification_rdata")
356 #end if
357
358 #end if
359
360
361 ######################## OPLS #############################
362 #elif str( $type_cond.method_cond.class_method) == "OPLS":
363 print("OPLS")
364
365 ######################## OPLS -CV #############################
366 #if str( $type_cond.method_cond.opls_analysis_cond.opls_method) == "opls_cvapply":
367 print("OPLS cv")
368
369 ## folds
370 #if str($type_cond.method_cond.opls_analysis_cond.opls_fold_cond.opls_fold_vector) == "opls_fold_internal":
371 fold_vector = msidata\$$type_cond.method_cond.opls_analysis_cond.opls_fold_cond.opls_fold_name
372 #elif str($type_cond.method_cond.opls_analysis_cond.opls_fold_cond.opls_fold_vector) == "opls_fold_external":
373 fold_tabular = read.delim("$type_cond.method_cond.opls_analysis_cond.opls_fold_cond.opls_fold_data", header = FALSE, stringsAsFactors = FALSE)
374 fold_vector = as.factor(fold_tabular[,$type_cond.method_cond.opls_analysis_cond.opls_fold_cond.opls_fold_column])
375 number_pixels = length(fold_vector) ## should be same as in data
376 #end if
377
378 ## plot of folds
379
380 position_df = cbind(coord(msidata)[,1:2], fold_vector)
381 fold_plot = ggplot(position_df, aes(x=x, y=y, fill=fold_vector))+
382 geom_tile() +
383 coord_fixed()+
384 ggtitle("Distribution of the fold variable")+
385 theme_bw()+
386 theme(text=element_text(family="ArialMT", face="bold", size=15))+
387 theme(legend.position="bottom",legend.direction="vertical")+
388 guides(fill=guide_legend(ncol=4,byrow=TRUE))
389 coord_labels = aggregate(cbind(x,y)~fold_vector, data=position_df, mean, na.rm=TRUE, na.action="na.pass")
390 coord_labels\$file_number = gsub( "_.*$", "", coord_labels\$fold_vector)
391 print(fold_plot)
392
393 ## number of components
394 components = c($type_cond.method_cond.opls_analysis_cond.opls_cvcomp)
395
396 ## OPLS-cvApply:
397 msidata.cv.opls <- cvApply(msidata, .y = y_vector, .fold = fold_vector, .fun = "OPLS", ncomp = components, keep.Xnew = $type_cond.method_cond.opls_analysis_cond.xnew_cv)
398
399 ## create table with summary
400 count = 1
401 summary_oplscv = list()
402 accuracy_vector = numeric()
403 for (iteration in components){
404 summary_iteration = summary(msidata.cv.opls)\$accuracy[[paste0("ncomp = ", iteration)]]
405 summary_iteration = cbind(rownames(summary_iteration), summary_iteration) ## include rownames in table
406 accuracy_vector[count] = summary_iteration[1,2] ## vector with accuracies to find later maximum for plot
407 empty_row = c(paste0("ncomp = ", iteration), rep( "", length(levels(y_vector)))) ## add line with ncomp for each iteration
408 ##rownames(labeled_iteration)[1] = paste0("ncomp = ", iteration)
409 ##labeled_iteration = cbind(rownames(labeled_iteration), labeled_iteration)
410 labeled_iteration = rbind(empty_row, summary_iteration)
411 summary_oplscv[[count]] = labeled_iteration ## create list with summary table for each component
412 count = count+1}
413 ## create dataframe from list
414 summary_oplscv = do.call(rbind, summary_oplscv)
415 summary_df = as.data.frame(summary_oplscv)
416 rownames(summary_df) = NULL
417
418 ## plots
419 ## plot to find ncomp with highest accuracy
420 plot(summary(msidata.cv.opls), main="Accuracy of OPLS classification")
421 ncomp_max = components[which.max(accuracy_vector)] ## find ncomp with max. accuracy
422 ## one image for each sample/fold, 4 images per page
423 image(msidata.cv.opls, model = list(ncomp = ncomp_max), layout = c(2, 2))
424
425 par(opar)
426 ## print table with summary in pdf
427 plot(0,type='n',axes=FALSE,ann=FALSE)
428 title(main="Summary for the different components\n", adj=0.5)
429 ## summary for 4 components (20 rows) fits in one page:
430 if (length(components)<5){
431 grid.table(summary_df, rows= NULL)
432 }else{
433 grid.table(summary_df[1:20,], rows= NULL)
434 mincount = 21
435 maxcount = 40
436 for (count20 in 1:(ceiling(nrow(summary_df)/20)-1)){
437 plot(0,type='n',axes=FALSE,ann=FALSE)
438 if (maxcount <= nrow(summary_df)){
439 grid.table(summary_df[mincount:maxcount,], rows= NULL)
440 mincount = mincount+20
441 maxcount = maxcount+20
442 }else{### stop last page with last sample otherwise NA in table
443 grid.table(summary_df[mincount:nrow(summary_df),], rows= NULL)}
444 }
445 }
446
447 ## optional output as .RData
448 #if $output_rdata:
449 save(msidata.cv.opls, file="$classification_rdata")
450 #end if
451
452 ######################## OPLS -analysis ###########################
453 #elif str( $type_cond.method_cond.opls_analysis_cond.opls_method) == "opls_analysis":
454 print("OPLS analysis")
455
456 ## number of components
457 component = c($type_cond.method_cond.opls_analysis_cond.opls_comp)
458
459 ### opls analysis
460 msidata.opls <- PLS(msidata, y = y_vector, ncomp = component, scale=$type_cond.method_cond.opls_analysis_cond.opls_scale, keep.Xnew = $type_cond.method_cond.opls_analysis_cond.xnew)
461
462 ### plot of OPLS coefficients
463 plot(msidata.opls, main="OPLS coefficients per m/z")
464
465 ### summary table of OPLS
466 summary_table = summary(msidata.opls)\$accuracy[[paste0("ncomp = ",component)]]
467 summary_table = cbind(rownames(summary_table), summary_table)
468 rownames(summary_table) = NULL
469 summary_table = data.frame(summary_table)
470 print(summary_table)
471 ###plot(0,type='n',axes=FALSE,ann=FALSE)
472 ###grid.table(test, rows= TRUE)
473
474 ### image of the best m/z
475 print(image(msidata, mz = topLabels(msidata.opls)[1,1], normalize.image = "linear", contrast.enhance = "histogram",smooth.image="gaussian", main="best m/z heatmap"))
476
477 ## m/z and pixel information output
478 opls_classes = data.frame(msidata.opls\$classes[[1]])
479 rownames(opls_classes) = names(pixels(msidata))
480 colnames(opls_classes) = "predicted diagnosis"
481 opls_toplabels = topLabels(msidata.opls, n=$type_cond.method_cond.opls_analysis_cond.opls_toplabels)
482
483 write.table(opls_toplabels, file="$mzfeatures", quote = FALSE, row.names = TRUE, col.names=NA, sep = "\t")
484 write.table(opls_classes, file="$pixeloutput", quote = FALSE, row.names = TRUE, col.names=NA, sep = "\t")
485
486 ## optional output as .RData
487 #if $output_rdata:
488 save(msidata.opls, file="$classification_rdata")
489 #end if
490
491 #end if
492
493 ######################## SSC #############################
494 #elif str( $type_cond.method_cond.class_method) == "spatialShrunkenCentroids":
495 print("SSC")
496
497 ######################## SSC - CV #############################
498 #if str( $type_cond.method_cond.ssc_analysis_cond.ssc_method) == "ssc_cvapply":
499 print("SSC cv")
500
501 ## folds
502 #if str($type_cond.method_cond.ssc_analysis_cond.ssc_fold_cond.ssc_fold_vector) == "ssc_fold_internal":
503 fold_vector = msidata\$$type_cond.method_cond.ssc_analysis_cond.ssc_fold_cond.ssc_fold_name
504
505 #elif str($type_cond.method_cond.ssc_analysis_cond.ssc_fold_cond.ssc_fold_vector) == "ssc_fold_external":
506 fold_tabular = read.delim("$type_cond.method_cond.ssc_analysis_cond.ssc_fold_cond.ssc_fold_data", header = FALSE, stringsAsFactors = FALSE)
507 fold_vector = as.factor(fold_tabular[,$type_cond.method_cond.ssc_analysis_cond.ssc_fold_cond.ssc_fold_column])
508 number_pixels = length(fold_vector) ## should be same as in data
509 #end if
510
511 ## plot of folds
512 position_df = cbind(coord(msidata)[,1:2], fold_vector)
513 fold_plot = ggplot(position_df, aes(x=x, y=y, fill=fold_vector))+
514 geom_tile() +
515 coord_fixed()+
516 ggtitle("Distribution of the fold variable")+
517 theme_bw()+
518 theme(text=element_text(family="ArialMT", face="bold", size=15))+
519 theme(legend.position="bottom",legend.direction="vertical")+
520 guides(fill=guide_legend(ncol=4,byrow=TRUE))
521 coord_labels = aggregate(cbind(x,y)~fold_vector, data=position_df, mean, na.rm=TRUE, na.action="na.pass")
522 coord_labels\$file_number = gsub( "_.*$", "", coord_labels\$fold_vector)
523 print(fold_plot)
524
525 ## SSC-cvApply:
526 msidata.cv.ssc <- cvApply(msidata, .y = y_vector,.fold = fold_vector,.fun = "spatialShrunkenCentroids", r = c($type_cond.method_cond.ssc_r), s = c($type_cond.method_cond.ssc_s), method = "$type_cond.method_cond.ssc_kernel_method")
527
528 ## create table with summary
529 count = 1
530 summary_ssccv = list()
531 accuracy_vector = numeric()
532
533 for (iteration in names(msidata.cv.ssc@resultData[[1]][,1])){
534 summary_iteration = summary(msidata.cv.ssc)\$accuracy[[iteration]]
535 summary_iteration = cbind(rownames(summary_iteration), summary_iteration) ## include rownames in table
536 accuracy_vector[count] = summary_iteration[1,2] ## vector with accuracies to find later maximum for plot
537 empty_row = c(iteration, rep( "", length(levels(y_vector)))) ## add line with ncomp for each iteration
538 labeled_iteration = rbind(empty_row, summary_iteration)
539 summary_ssccv[[count]] = labeled_iteration ## create list with summary table for each component
540 count = count+1
541 }
542
543 ##create dataframe from list
544 summary_ssccv = do.call(rbind, summary_ssccv)
545 summary_df = as.data.frame(summary_ssccv)
546 rownames(summary_df) = NULL
547
548 ## plot to find parameters with highest accuracy
549 plot(summary(msidata.cv.ssc), main="Accuracy of SSC classification")
550 best_params = names(msidata.cv.ssc@resultData[[1]][,1])[which.max(accuracy_vector)] ## find parameters with max. accuracy
551 r_value = as.numeric(substring(unlist(strsplit(best_params, ","))[1], 4))
552 s_value = as.numeric(substring(unlist(strsplit(best_params, ","))[3], 5)) ## remove space
553
554 image(msidata.cv.ssc, model = list( r = r_value, s = s_value ), layout=c(2,2))
555
556 par(opar)
557 ## print table with summary in pdf
558 plot(0,type='n',axes=FALSE,ann=FALSE)
559 title(main="Summary for the different parameters\n", adj=0.5)
560 ## summary for 4 parameters (20 rows) fits in one page:
561 if (length(names(msidata.cv.ssc@resultData[[1]][,1]))<5){
562 grid.table(summary_df, rows= NULL)
563 }else{
564 grid.table(summary_df[1:20,], rows= NULL)
565 mincount = 21
566 maxcount = 40
567 for (count20 in 1:(ceiling(nrow(summary_df)/20)-1)){
568 plot(0,type='n',axes=FALSE,ann=FALSE)
569 if (maxcount <= nrow(summary_df)){
570 grid.table(summary_df[mincount:maxcount,], rows= NULL)
571 mincount = mincount+20
572 maxcount = maxcount+20
573 }else{### stop last page with last sample otherwise NA in table
574 grid.table(summary_df[mincount:nrow(summary_df),], rows= NULL)}
575 }
576 }
577
578 ## optional output as .RData
579 #if $output_rdata:
580 save(msidata.cv.opls, file="$classification_rdata")
581 #end if
582
583 ######################## SSC -analysis ###########################
584 #elif str( $type_cond.method_cond.ssc_analysis_cond.ssc_method) == "ssc_analysis":
585 print("SSC analysis")
586
587 ## SSC analysis
588 msidata.ssc <- spatialShrunkenCentroids(msidata, y = y_vector, .fold = fold_vector,
589 r = c($type_cond.method_cond.ssc_r), s = c($type_cond.method_cond.ssc_s), method = "$type_cond.method_cond.ssc_kernel_method")
590
591 plot(msidata.ssc, mode = "tstatistics", model = list("r" = c($type_cond.method_cond.ssc_r), "s" = c($type_cond.method_cond.ssc_s)))
592
593 ### summary table SSC
594
595 ##summary(msidata.ssc)\$accuracy[[names(msidata.ssc@resultData)]]
596 summary_table = summary(msidata.ssc)
597 print(summary_table)
598 ##summary_table = cbind(rownames(summary_table), summary_table)
599 ##rownames(summary_table) = NULL
600
601 ###plot(0,type='n',axes=FALSE,ann=FALSE)
602 ###grid.table(summary_table, rows= TRUE)
603
604 ### image of the best m/z
605 print(image(msidata, mz = topLabels(msidata.ssc)[1,1], normalize.image = "linear", contrast.enhance = "histogram",smooth.image="gaussian", main="best m/z heatmap"))
606
607 ## m/z and pixel information output
608 ssc_classes = data.frame(msidata.ssc\$classes[[1]])
609 rownames(ssc_classes) = names(pixels(msidata))
610 colnames(ssc_classes) = "predicted diagnosis"
611 ssc_toplabels = topLabels(msidata.ssc)
612
613 write.table(ssc_toplabels, file="$mzfeatures", quote = FALSE, row.names = TRUE, col.names=NA, sep = "\t")
614 write.table(ssc_classes, file="$pixeloutput", quote = FALSE, row.names = TRUE, col.names=NA, sep = "\t")
615
616 ## optional output as .RData
617 #if $output_rdata:
618 save(msidata.ssc, file="$classification_rdata")
619 #end if
620
621 #end if
622 #end if
623
624
625 ######################## II) Prediction #############################
626 #############################################################################
627 #elif str( $type_cond.type_method) == "prediction":
628 print("prediction")
629
630 #if str($type_cond.new_y.new_y_values) == "no_new_y":
631 new_y_vector = FALSE
632 #elif str($type_cond.new_y.new_y_values) == "new_y_internal":
633 new_y_vector = msidata\$$type_cond.new_y.new_y_name
634 #elif str($type_cond.new_y.new_y_values) == "new_y_external":
635
636 new_y_tabular = read.delim("$type_cond.new_y.new_y_data", header = FALSE, stringsAsFactors = FALSE)
637 new_y_vector = new_y_tabular[,$type_cond.new_y.new_y_column]
638 number_pixels = length(new_y_vector) ## should be same as in data
639 #end if
640
641 training_data = loadRData("$type_cond.training_result")
642 prediction = predict(training_data,msidata, newy = new_y_vector)
643
644 ## optional output as .RData
645 #if $output_rdata:
646 msidata = prediction
647 save(msidata, file="$classification_rdata")
648 #end if
649 #end if
650
651 dev.off()
652 }else{
653 print("Inputfile has no intensities > 0")
654 dev.off()
655 }
656
657 ]]></configfile>
658 </configfiles>
659 <inputs>
660 <param name="infile" type="data" format="imzml, rdata, analyze75"
661 label="Inputfile as imzML, Analyze7.5 or Cardinal MSImageSet saved as RData"
662 help="Upload composite datatype imzml (ibd+imzML) or analyze75 (hdr+img+t2m) or regular upload .RData (Cardinal MSImageSet)"/>
663 <conditional name="processed_cond">
664 <param name="processed_file" type="select" label="Is the input file a processed imzML file ">
665 <option value="no_processed" selected="True">not a processed imzML</option>
666 <option value="processed">processed imzML</option>
667 </param>
668 <when value="no_processed"/>
669 <when value="processed">
670 <param name="accuracy" type="float" value="50" label="Mass accuracy to which the m/z values will be binned" help="This should be set to the native accuracy of the mass spectrometer, if known"/>
671 <param name="units" display="radio" type="select" label="Unit of the mass accuracy" help="either m/z or ppm">
672 <option value="mz" >mz</option>
673 <option value="ppm" selected="True" >ppm</option>
674 </param>
675 </when>
676 </conditional>
677
678 <conditional name="type_cond">
679 <param name="type_method" type="select" label="Analysis step to perform">
680 <option value="training" selected="True">training</option>
681 <option value="prediction">prediction</option>
682 </param>
683 <when value="training">
684
685 <conditional name="method_cond">
686 <param name="class_method" type="select" label="Select the method for classification">
687 <option value="PLS" selected="True">PLS</option>
688 <option value="OPLS">OPLS</option>
689 <option value="spatialShrunkenCentroids">spatial shrunken centroids</option>
690 </param>
691 <when value="PLS">
692
693 <conditional name="analysis_cond">
694 <param name="PLS_method" type="select" label="Crossvalidation or analysis">
695 <option value="cvapply" selected="True">cvApply</option>
696 <option value="PLS_analysis">PLS analysis</option>
697 </param>
698 <when value="cvapply">
699
700 <param name="plscv_comp" type="text" value="1:2"
701 label="The number of PLS components" help="Multiple values are allowed (e.g. 1,2,3 or 2:5)"/>
702 <conditional name="fold_cond">
703 <param name="fold_vector" type="select" label="Define the fold variable">
704 <option value="fold_internal" selected="True">dataset contains already fold</option>
705 <option value="fold_external">use fold from tabular file</option>
706 </param>
707 <when value="fold_internal">
708 <param name="fold_name" type="text" value="sample" label="Name of the pData slot where fold is stored" help="each fold must contain pixels of all categories"/>
709 </when>
710 <when value="fold_external">
711 <param name="fold_data" type="data" format="tabular" label="Tabular file with column for folds" help="Number of rows must be number of pixels"/>
712 <param name="fold_column" data_ref="fold_data" label="Column with folds" type="data_column"/>
713 </when>
714 </conditional>
715 </when>
716
717 <when value="PLS_analysis">
718 <param name="pls_comp" type="integer" value="5"
719 label="The optimal number of PLS components as indicated by cross-validations" help="Run cvApply first to optain optiaml number of PLS components"/>
720 <param name="pls_scale" type="boolean" display="radio" label="data scaling" truevalue="TRUE" falsevalue="FALSE"/>
721 <param name="pls_toplabels" type="integer" value="100"
722 label="Number of toplabels (masses) which should be written in tabular output"/>
723 </when>
724 </conditional>
725 </when>
726
727 <when value="OPLS">
728
729 <conditional name="opls_analysis_cond">
730 <param name="opls_method" type="select" label="Analysis step to perform">
731 <option value="opls_cvapply" selected="True">cvApply</option>
732 <option value="opls_analysis">OPLS analysis</option>
733 </param>
734
735 <when value="opls_cvapply">
736 <param name="opls_cvcomp" type="text" value="1:2"
737 label="The number of OPLS components" help="Multiple values are allowed (e.g. 1,2,3 or 2:5)"/>
738 <param name="xnew_cv" type="boolean" display="radio" truevalue="TRUE" falsevalue="FALSE" label="Keep new matrix"/>
739 <conditional name="opls_fold_cond">
740 <param name="opls_fold_vector" type="select" label="Define the fold variable">
741 <option value="opls_fold_internal" selected="True">dataset contains already fold</option>
742 <option value="opls_fold_external">use fold from tabular file</option>
743 </param>
744 <when value="opls_fold_internal">
745 <param name="opls_fold_name" type="text" value="sample" label="Name of the pData slot where fold is stored" help="each fold must contain pixels of all categories"/>
746 </when>
747 <when value="opls_fold_external">
748 <param name="opls_fold_data" type="data" format="tabular" label="Tabular file with column for folds" help="Number of rows must be number of pixels"/>
749 <param name="opls_fold_column" data_ref="opls_fold_data" label="Column with folds" type="data_column"/>
750 </when>
751 </conditional>
752 </when>
753
754 <when value="opls_analysis">
755 <param name="opls_comp" type="integer" value="5"
756 label="The optimal number of PLS components as indicated by cross-validations" help="Run cvApply first to optain optiaml number of PLS components"/>
757 <param name="xnew" type="boolean" display="radio" truevalue="TRUE" falsevalue="FALSE" label="Keep new matrix"/>
758 <param name="opls_scale" type="select" label="data scaling" display="radio" optional="False">
759 <option value="TRUE">yes</option>
760 <option value="FALSE" selected="True">no</option>
761 </param>
762 <param name="opls_toplabels" type="integer" value="100"
763 label="Number of toplabels (features) which should be written in tabular output"/>
764 </when>
765 </conditional>
766 </when>
767
768 <when value="spatialShrunkenCentroids">
769 <conditional name="ssc_analysis_cond">
770 <param name="ssc_method" type="select" label="Analysis step to perform">
771 <option value="ssc_cvapply" selected="True">cvApply</option>
772 <option value="ssc_analysis">spatial shrunken centroids analysis</option>
773 </param>
774 <when value="ssc_cvapply">
775
776 <conditional name="ssc_fold_cond">
777 <param name="ssc_fold_vector" type="select" label="Define the fold variable">
778 <option value="ssc_fold_internal" selected="True">dataset contains already fold</option>
779 <option value="ssc_fold_external">use fold from tabular file</option>
780 </param>
781 <when value="ssc_fold_internal">
782 <param name="ssc_fold_name" type="text" value="sample" label="Name of the pData slot where fold is stored" help="each fold must contain pixels of all categories"/>
783 </when>
784 <when value="ssc_fold_external">
785 <param name="ssc_fold_data" type="data" format="tabular" label="Tabular file with column for folds" help="Number of rows must be number of pixels"/>
786 <param name="ssc_fold_column" data_ref="ssc_fold_data" label="Column with folds" type="data_column"/>
787 </when>
788 </conditional>
789 </when>
790
791 <when value="ssc_analysis">
792
793 <param name="ssc_toplabels" type="integer" value="100"
794 label="Number of toplabels (features) which should be written in tabular output"/>
795 </when>
796 </conditional>
797 <param name="ssc_r" type="text" value="2"
798 label="The spatial neighborhood radius of nearby pixels to consider (r)" help="For cvapply multiple values are allowed (e.g. 1,2,3 or 2:5)"/>
799 <param name="ssc_s" type="text" value="2"
800 label="The sparsity thresholding parameter by which to shrink the t-statistics (s)" help="For cvapply multiple values are allowed (e.g. 1,2,3 or 2:5)"/>
801 <param name="ssc_kernel_method" type="select" display="radio" label = "The method to use to calculate the spatial smoothing kernels for the embedding. The 'gaussian' method refers to spatially-aware (SA) weights, and 'adaptive' refers to spatially-aware structurally-adaptive (SASA) weights">
802 <option value="gaussian">gaussian</option>
803 <option value="adaptive" selected="True">adaptive</option>
804 </param>
805
806 </when>
807 </conditional>
808 <conditional name="y_cond">
809 <param name="y_vector" type="select" label="Define the response variable y">
810 <option value="y_internal" selected="True">dataset contains already y</option>
811 <option value="y_external">use y from tabular file</option>
812 </param>
813 <when value="y_internal">
814 <param name="y_name" type="text" value="combined_sample" label="Name of the pData slot where y is stored" help="Outputs of MSI_combine tool have 'combined_sample' as name"/>
815 </when>
816 <when value="y_external">
817 <param name="y_data" type="data" format="tabular" label="Tabular file with column for y response"/>
818 <param name="y_column" data_ref="y_data" label="Column with y response" type="data_column"/>
819 </when>
820 </conditional>
821 </when>
822
823 <when value="prediction">
824 <param name="training_result" type="data" format="rdata" label="Result from previous classification training"/>
825 <conditional name="new_y">
826 <param name="new_y_values" type="select" label="Define the new response y">
827 <option value="no_new_y" >no new y response</option>
828 <option value="new_y_internal" selected="True">dataset contains already y</option>
829 <option value="new_y_external">use y from tabular file</option>
830 </param>
831 <when value="no_new_y"/>
832 <when value="new_y_internal">
833 <param name="new_y_name" type="text" value="combined_sample" label="Name of the pData slot where y is stored" help="data merged with MSI_combine tool has 'combined_sample' as name"/>
834 </when>
835
836 <when value="new_y_external">
837 <param name="new_y_data" type="data" format="tabular" label="Tabular file with column for y response"/>
838 <param name="new_y_column" data_ref="new_y_data" label="Column with y response" type="data_column"/>
839 </when>
840 </conditional>
841 </when>
842 </conditional>
843 <param name="output_rdata" type="boolean" display="radio" label="Results as .RData output"/>
844 </inputs>
845 <outputs>
846 <data format="pdf" name="classification_images" from_work_dir="classificationpdf.pdf" label = "$infile.display_name classification"/>
847 <data format="tabular" name="mzfeatures" label="$infile.display_name features"/>
848 <data format="tabular" name="pixeloutput" label="$infile.display_name pixels"/>
849 <data format="rdata" name="classification_rdata" label="$infile.display_name classification">
850 <filter>output_rdata</filter>
851 </data>
852 </outputs>
853 <tests>
854 <test expect_num_outputs="3">
855 <param name="infile" value="testfile_squares.rdata" ftype="rdata"/>
856 <conditional name="type_cond">
857 <param name="type_method" value="training"/>
858 <conditional name="method_cond">
859 <param name="class_method" value="PLS"/>
860 <conditional name="analysis_cond">
861 <param name="PLS_method" value="cvapply"/>
862
863 <param name="plscv_comp" value="2:4"/>
864 <conditional name="fold_cond">
865 <param name="fold_vector" value="fold_external"/>
866 <param name="fold_data" value="pixel_annotation_file1.tabular" ftype="tabular"/>
867 <param name="fold_column" value="1"/>
868 </conditional>
869
870 </conditional>
871 </conditional>
872 <conditional name="y_cond">
873 <param name="y_vector" value="y_external"/>
874 <param name="y_data" value="pixel_annotation_file1.tabular" ftype="tabular"/>
875 <param name="y_column" value="2"/>
876 </conditional>
877 </conditional>
878 <output name="mzfeatures" file="features_test1.tabular"/>
879 <output name="pixeloutput" file="pixels_test1.tabular"/>
880 <output name="classification_images" file="test1.pdf" compare="sim_size" delta="20000"/>
881 </test>
882
883 <test expect_num_outputs="4">
884 <param name="infile" value="testfile_squares.rdata" ftype="rdata"/>
885 <conditional name="type_cond">
886 <param name="type_method" value="training"/>
887 <conditional name="method_cond">
888 <param name="class_method" value="PLS"/>
889 <conditional name="analysis_cond">
890 <param name="PLS_method" value="PLS_analysis"/>
891
892 <param name="pls_comp" value="2"/>
893 <param name="pls_scale" value="TRUE"/>
894 <param name="pls_toplabels" value="100"/>
895 <conditional name="fold_cond">
896 <param name="fold_vector" value="fold_external"/>
897 <param name="fold_data" value="pixel_annotation_file1.tabular" ftype="tabular"/>
898 <param name="fold_column" value="1"/>
899 </conditional>
900
901 </conditional>
902 </conditional>
903 <conditional name="y_cond">
904 <param name="y_vector" value="y_external"/>
905 <param name="y_data" value="pixel_annotation_file1.tabular" ftype="tabular"/>
906 <param name="y_column" value="2"/>
907 </conditional>
908 </conditional>
909 <param name="output_rdata" value="True"/>
910 <output name="mzfeatures" file="features_test2.tabular"/>
911 <output name="pixeloutput" file="pixels_test2.tabular"/>
912 <output name="classification_images" file="test2.pdf" compare="sim_size" delta="20000"/>
913 <output name="classification_rdata" file="test2.rdata" compare="sim_size" />
914 </test>
915
916 <test expect_num_outputs="3">
917 <param name="infile" value="testfile_squares.rdata" ftype="rdata"/>
918 <conditional name="type_cond">
919 <param name="type_method" value="training"/>
920 <conditional name="method_cond">
921 <param name="class_method" value="OPLS"/>
922 <conditional name="opls_analysis_cond">
923 <param name="opls_method" value="opls_analysis"/>
924
925 <param name="opls_cvcomp" value="1:2"/>
926 <param name="xnew_cv" value="FALSE"/>
927 <conditional name="opls_fold_cond">
928 <param name="opls_fold_vector" value="opls_fold_external"/>
929 <param name="opls_fold_data" ftype="tabular" value="random_factors.tabular"/>
930 <param name="opls_fold_column" value="1"/>
931 </conditional>
932 </conditional>
933 </conditional>
934 <conditional name="y_cond">
935 <param name="y_vector" value="y_external"/>
936 <param name="y_data" value="random_factors.tabular" ftype="tabular"/>
937 <param name="y_column" value="2"/>
938 </conditional>
939 </conditional>
940 <output name="mzfeatures" file="features_test3.tabular"/>
941 <output name="pixeloutput" file="pixels_test3.tabular"/>
942 <output name="classification_images" file="test3.pdf" compare="sim_size" delta="20000"/>
943 </test>
944
945 <test expect_num_outputs="4">
946 <param name="infile" value="testfile_squares.rdata" ftype="rdata"/>
947 <conditional name="type_cond">
948 <param name="type_method" value="training"/>
949 <conditional name="method_cond">
950 <param name="class_method" value="OPLS"/>
951 <conditional name="opls_analysis_cond">
952
953 <param name="opls_method" value="opls_analysis"/>
954 <param name="opls_comp" value="3"/>
955 <param name="xnew" value="FALSE"/>
956 <param name="opls_scale" value="FALSE"/>
957 <param name="opls_toplabels" value="100"/>
958 </conditional>
959
960 </conditional>
961 <conditional name="y_cond">
962 <param name="y_vector" value="y_external"/>
963 <param name="y_data" value="random_factors.tabular" ftype="tabular"/>
964 <param name="y_column" value="2"/>
965 </conditional>
966 </conditional>
967 <param name="output_rdata" value="True"/>
968 <output name="mzfeatures" file="features_test4.tabular"/>
969 <output name="pixeloutput" file="pixels_test4.tabular"/>
970 <output name="classification_images" file="test4.pdf" compare="sim_size" delta="20000"/>
971 <output name="classification_rdata" file="test4.rdata" compare="sim_size" />
972 </test>
973
974 <test expect_num_outputs="3">
975 <param name="infile" value="testfile_squares.rdata" ftype="rdata"/>
976 <conditional name="type_cond">
977 <param name="type_method" value="training"/>
978 <conditional name="method_cond">
979 <param name="class_method" value="spatialShrunkenCentroids"/>
980 <conditional name="ssc_analysis_cond">
981 <param name="ssc_method" value="ssc_cvapply"/>
982 <conditional name="ssc_fold_cond">
983 <param name="ssc_fold_vector" value="ssc_fold_external"/>
984 <param name="ssc_fold_data" value="pixel_annotation_file1.tabular" ftype="tabular"/>
985 <param name="ssc_fold_column" value="1"/>
986 </conditional>
987 <param name="ssc_r" value="1:2"/>
988 <param name="ssc_s" value="2:3"/>
989 <param name="ssc_kernel_method" value="adaptive"/>
990 </conditional>
991 </conditional>
992 <conditional name="y_cond">
993 <param name="y_vector" value="y_external"/>
994 <param name="y_data" value="pixel_annotation_file1.tabular" ftype="tabular"/>
995 <param name="y_column" value="2"/>
996 </conditional>
997 </conditional>
998 <output name="mzfeatures" file="features_test5.tabular"/>
999 <output name="pixeloutput" file="pixels_test5.tabular"/>
1000 <output name="classification_images" file="test5.pdf" compare="sim_size" delta="20000"/>
1001 </test>
1002
1003 <test expect_num_outputs="4">
1004 <param name="infile" value="testfile_squares.rdata" ftype="rdata"/>
1005 <conditional name="type_cond">
1006 <param name="type_method" value="training"/>
1007 <conditional name="method_cond">
1008 <param name="class_method" value="spatialShrunkenCentroids"/>
1009 <conditional name="ssc_analysis_cond">
1010 <param name="ssc_method" value="ssc_analysis"/>
1011 <param name="ssc_toplabels" value="100"/>
1012 </conditional>
1013 <param name="ssc_r" value="2"/>
1014 <param name="ssc_s" value="2"/>
1015 <param name="ssc_kernel_method" value="adaptive"/>
1016 </conditional>
1017 <conditional name="y_cond">
1018 <param name="y_vector" value="y_external"/>
1019 <param name="y_data" value="random_factors.tabular" ftype="tabular"/>
1020 <param name="y_column" value="2"/>
1021 </conditional>
1022 </conditional>
1023 <param name="output_rdata" value="True"/>
1024 <output name="mzfeatures" file="features_test6.tabular"/>
1025 <output name="pixeloutput" file="pixels_test6.tabular"/>
1026 <output name="classification_images" file="test6.pdf" compare="sim_size" delta="20000"/>
1027 <output name="classification_rdata" file="test6.rdata" compare="sim_size" />
1028 </test>
1029
1030 <test expect_num_outputs="4">
1031 <param name="infile" value="testfile_squares.rdata" ftype="rdata"/>
1032 <conditional name="type_cond">
1033 <param name="type_method" value="prediction"/>
1034 <param name="training_result" value="test2.rdata" ftype="rdata"/>
1035 <conditional name="new_y">
1036 <param name="new_y_values" value="new_y_external"/>
1037 <param name="new_y_data" value="pixel_annotation_file1.tabular" ftype="tabular"/>
1038 <param name="new_y_column" value="2"/>
1039 </conditional>
1040 </conditional>
1041 <param name="output_rdata" value="True"/>
1042 <output name="mzfeatures" file="features_test7.tabular"/>
1043 <output name="pixeloutput" file="pixels_test7.tabular"/>
1044 <output name="classification_images" file="test7.pdf" compare="sim_size" delta="20000"/>
1045 <output name="classification_rdata" file="test7.rdata" compare="sim_size" />
1046 </test>
1047
1048 </tests>
1049 <help>
1050 <![CDATA[
1051
1052 Cardinal is an R package that implements statistical & computational tools for analyzing mass spectrometry imaging datasets. `More information on Cardinal <http://cardinalmsi.org//>`_
1053
1054 This tool provides three different Cardinal functions for supervised classification of mass-spectrometry imaging data.
1055
1056 Input data: 3 types of input data can be used:
1057
1058 - imzml file (upload imzml and ibd file via the "composite" function) `Introduction to the imzml format <https://ms-imaging.org/wp/imzml/>`_
1059 - Analyze7.5 (upload hdr, img and t2m file via the "composite" function)
1060 - Cardinal "MSImageSet" data (with variable name "msidata", saved as .RData)
1061
1062 Options:
1063
1064 - PLS(-DA): partial least square (discriminant analysis)
1065 - O-PLS(-DA): Orthogonal partial least squares (discriminant analysis)
1066 - Spatial shrunken centroids
1067
1068 Output:
1069
1070 - Pdf with the heatmaps and plots for the classification
1071 - Tabular file with information on masses and pixels: toplabels/classes (PLS, spatial shrunken centroids)
1072 - optional RData output to further explore the results with Cardinal in R
1073
1074 ]]>
1075 </help>
1076 <citations>
1077 <citation type="doi">10.1093/bioinformatics/btv146</citation>
1078 </citations>
1079 </tool>