Mercurial > repos > galaxyp > msi_classification
comparison msi_classification.xml @ 0:f0b415eb3bcf draft default tip
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/msi_classification commit 8087490eb4dcaf4ead0f03eae4126780d21e5503
author | galaxyp |
---|---|
date | Fri, 06 Jul 2018 14:12:51 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:f0b415eb3bcf |
---|---|
1 <tool id="mass_spectrometry_imaging_classification" name="MSI classification" version="1.10.0.0"> | |
2 <description>spatial classification of mass spectrometry imaging data</description> | |
3 <requirements> | |
4 <requirement type="package" version="1.10.0">bioconductor-cardinal</requirement> | |
5 <requirement type="package" version="2.2.1">r-gridextra</requirement> | |
6 <requirement type="package" version="0.20-35">r-lattice</requirement> | |
7 <requirement type="package" version="2.2.1">r-ggplot2</requirement> | |
8 </requirements> | |
9 <command detect_errors="exit_code"> | |
10 <![CDATA[ | |
11 | |
12 #if $infile.ext == 'imzml' | |
13 ln -s '${infile.extra_files_path}/imzml' infile.imzML && | |
14 ln -s '${infile.extra_files_path}/ibd' infile.ibd && | |
15 #elif $infile.ext == 'analyze75' | |
16 ln -s '${infile.extra_files_path}/hdr' infile.hdr && | |
17 ln -s '${infile.extra_files_path}/img' infile.img && | |
18 ln -s '${infile.extra_files_path}/t2m' infile.t2m && | |
19 #else | |
20 ln -s $infile infile.RData && | |
21 #end if | |
22 cat '${MSI_segmentation}' && | |
23 echo ${MSI_segmentation} && | |
24 Rscript '${MSI_segmentation}' | |
25 | |
26 ]]> | |
27 </command> | |
28 <configfiles> | |
29 <configfile name="MSI_segmentation"><![CDATA[ | |
30 | |
31 | |
32 ################################# load libraries and read file ######################### | |
33 | |
34 library(Cardinal) | |
35 library(gridExtra) | |
36 library(lattice) | |
37 library(ggplot2) | |
38 | |
39 | |
40 #if $infile.ext == 'imzml' | |
41 #if str($processed_cond.processed_file) == "processed": | |
42 msidata <- readImzML('infile', mass.accuracy=$processed_cond.accuracy, units.accuracy = "$processed_cond.units") | |
43 #else | |
44 msidata <- readImzML('infile') | |
45 #end if | |
46 #elif $infile.ext == 'analyze75' | |
47 msidata = readAnalyze('infile') | |
48 #else | |
49 load('infile.RData') | |
50 #end if | |
51 | |
52 ## function to later read RData reference files in | |
53 | |
54 loadRData <- function(fileName){ | |
55 #loads an RData file, and returns it | |
56 load(fileName) | |
57 get(ls()[ls() != "fileName"]) | |
58 } | |
59 | |
60 ## create full matrix to make processed imzML files compatible with classification | |
61 iData(msidata) <- iData(msidata)[] | |
62 | |
63 ###################################### file properties in numbers ############## | |
64 | |
65 ## Number of features (mz) | |
66 maxfeatures = length(features(msidata)) | |
67 ## Range mz | |
68 minmz = round(min(mz(msidata)), digits=2) | |
69 maxmz = round(max(mz(msidata)), digits=2) | |
70 ## Number of spectra (pixels) | |
71 pixelcount = length(pixels(msidata)) | |
72 ## Range x coordinates | |
73 minimumx = min(coord(msidata)[,1]) | |
74 maximumx = max(coord(msidata)[,1]) | |
75 ## Range y coordinates | |
76 minimumy = min(coord(msidata)[,2]) | |
77 maximumy = max(coord(msidata)[,2]) | |
78 ## Range of intensities | |
79 minint = round(min(spectra(msidata)[]), digits=2) | |
80 maxint = round(max(spectra(msidata)[]), digits=2) | |
81 medint = round(median(spectra(msidata)[]), digits=2) | |
82 ## Number of intensities > 0 | |
83 npeaks= sum(spectra(msidata)[]>0) | |
84 ## Spectra multiplied with mz (potential number of peaks) | |
85 numpeaks = ncol(spectra(msidata)[])*nrow(spectra(msidata)[]) | |
86 ## Percentage of intensities > 0 | |
87 percpeaks = round(npeaks/numpeaks*100, digits=2) | |
88 ## Number of empty TICs | |
89 TICs = colSums(spectra(msidata)[]) | |
90 NumemptyTIC = sum(TICs == 0) | |
91 | |
92 | |
93 ## Processing informations | |
94 processinginfo = processingData(msidata) | |
95 centroidedinfo = processinginfo@centroided # TRUE or FALSE | |
96 | |
97 ## if TRUE write processinginfo if no write FALSE | |
98 | |
99 ## normalization | |
100 if (length(processinginfo@normalization) == 0) { | |
101 normalizationinfo='FALSE' | |
102 } else { | |
103 normalizationinfo=processinginfo@normalization | |
104 } | |
105 ## smoothing | |
106 if (length(processinginfo@smoothing) == 0) { | |
107 smoothinginfo='FALSE' | |
108 } else { | |
109 smoothinginfo=processinginfo@smoothing | |
110 } | |
111 ## baseline | |
112 if (length(processinginfo@baselineReduction) == 0) { | |
113 baselinereductioninfo='FALSE' | |
114 } else { | |
115 baselinereductioninfo=processinginfo@baselineReduction | |
116 } | |
117 ## peak picking | |
118 if (length(processinginfo@peakPicking) == 0) { | |
119 peakpickinginfo='FALSE' | |
120 } else { | |
121 peakpickinginfo=processinginfo@peakPicking | |
122 } | |
123 | |
124 ############################################################################# | |
125 | |
126 properties = c("Number of mz features", | |
127 "Range of mz values", | |
128 "Number of pixels", | |
129 "Range of x coordinates", | |
130 "Range of y coordinates", | |
131 "Range of intensities", | |
132 "Median of intensities", | |
133 "Intensities > 0", | |
134 "Number of empty spectra", | |
135 "Preprocessing", | |
136 "Normalization", | |
137 "Smoothing", | |
138 "Baseline reduction", | |
139 "Peak picking", | |
140 "Centroided") | |
141 | |
142 values = c(paste0(maxfeatures), | |
143 paste0(minmz, " - ", maxmz), | |
144 paste0(pixelcount), | |
145 paste0(minimumx, " - ", maximumx), | |
146 paste0(minimumy, " - ", maximumy), | |
147 paste0(minint, " - ", maxint), | |
148 paste0(medint), | |
149 paste0(percpeaks, " %"), | |
150 paste0(NumemptyTIC), | |
151 paste0(" "), | |
152 paste0(normalizationinfo), | |
153 paste0(smoothinginfo), | |
154 paste0(baselinereductioninfo), | |
155 paste0(peakpickinginfo), | |
156 paste0(centroidedinfo)) | |
157 | |
158 property_df = data.frame(properties, values) | |
159 | |
160 | |
161 ######################################## PDF ################################### | |
162 ################################################################################ | |
163 ################################################################################ | |
164 | |
165 Title = "Prediction" | |
166 | |
167 #if str( $type_cond.type_method) == "training": | |
168 #if str( $type_cond.method_cond.class_method) == "PLS": | |
169 Title = "PLS" | |
170 #elif str( $type_cond.method_cond.class_method) == "OPLS": | |
171 Title = "OPLS" | |
172 #elif str( $type_cond.method_cond.class_method) == "spatialShrunkenCentroids": | |
173 Title = "SSC" | |
174 #end if | |
175 #end if | |
176 | |
177 pdf("classificationpdf.pdf", fonts = "Times", pointsize = 12) | |
178 plot(0,type='n',axes=FALSE,ann=FALSE) | |
179 | |
180 | |
181 title(main=paste0(Title," for file: \n\n", "$infile.display_name")) | |
182 | |
183 | |
184 | |
185 ##################### I) numbers and control plots ############################# | |
186 ############################################################################### | |
187 | |
188 ## table with values | |
189 grid.table(property_df, rows= NULL) | |
190 | |
191 if (npeaks > 0){ | |
192 | |
193 opar <- par() | |
194 | |
195 ######################## II) Training ############################# | |
196 ############################################################################# | |
197 #if str( $type_cond.type_method) == "training": | |
198 print("training") | |
199 | |
200 | |
201 ## load y response (will be needed in every training scenario) | |
202 | |
203 #if str($type_cond.y_cond.y_vector) == "y_internal": | |
204 y_vector = msidata\$$type_cond.y_cond.y_name | |
205 #elif str($type_cond.y_cond.y_vector) == "y_external": | |
206 y_tabular = read.delim("$type_cond.y_cond.y_data", header = FALSE, stringsAsFactors = FALSE) | |
207 y_vector = as.factor(y_tabular[,$type_cond.y_cond.y_column]) | |
208 number_pixels = length(y_vector) ## should be same as in data | |
209 #end if | |
210 | |
211 ## plot of y vector | |
212 | |
213 position_df = cbind(coord(msidata)[,1:2], y_vector) | |
214 y_plot = ggplot(position_df, aes(x=x, y=y, fill=y_vector))+ | |
215 geom_tile() + | |
216 coord_fixed()+ | |
217 ggtitle("Distribution of the response variable y")+ | |
218 theme_bw()+ | |
219 theme(text=element_text(family="ArialMT", face="bold", size=15))+ | |
220 theme(legend.position="bottom",legend.direction="vertical")+ | |
221 guides(fill=guide_legend(ncol=4,byrow=TRUE)) | |
222 coord_labels = aggregate(cbind(x,y)~y_vector, data=position_df, mean, na.rm=TRUE, na.action="na.pass") | |
223 coord_labels\$file_number = gsub( "_.*$", "", coord_labels\$y_vector) | |
224 print(y_plot) | |
225 | |
226 | |
227 ######################## PLS ############################# | |
228 #if str( $type_cond.method_cond.class_method) == "PLS": | |
229 print("PLS") | |
230 | |
231 ######################## PLS - CV ############################# | |
232 #if str( $type_cond.method_cond.analysis_cond.PLS_method) == "cvapply": | |
233 print("PLS cv") | |
234 | |
235 ## folds | |
236 #if str($type_cond.method_cond.analysis_cond.fold_cond.fold_vector) == "fold_internal": | |
237 | |
238 fold_vector = msidata\$$type_cond.method_cond.analysis_cond.fold_cond.fold_name | |
239 #elif str($type_cond.method_cond.analysis_cond.fold_cond.fold_vector) == "fold_external": | |
240 fold_tabular = read.delim("$type_cond.method_cond.analysis_cond.fold_cond.fold_data", header = FALSE, stringsAsFactors = FALSE) | |
241 fold_vector = as.factor(fold_tabular[,$type_cond.method_cond.analysis_cond.fold_cond.fold_column]) | |
242 number_pixels = length(fold_vector) ## should be same as in data | |
243 #end if | |
244 | |
245 ## plot of folds | |
246 | |
247 position_df = cbind(coord(msidata)[,1:2], fold_vector) | |
248 fold_plot = ggplot(position_df, aes(x=x, y=y, fill=fold_vector))+ | |
249 geom_tile() + | |
250 coord_fixed()+ | |
251 ggtitle("Distribution of the fold variable")+ | |
252 theme_bw()+ | |
253 theme(text=element_text(family="ArialMT", face="bold", size=15))+ | |
254 theme(legend.position="bottom",legend.direction="vertical")+ | |
255 guides(fill=guide_legend(ncol=4,byrow=TRUE)) | |
256 coord_labels = aggregate(cbind(x,y)~fold_vector, data=position_df, mean, na.rm=TRUE, na.action="na.pass") | |
257 coord_labels\$file_number = gsub( "_.*$", "", coord_labels\$fold_vector) | |
258 print(fold_plot) | |
259 | |
260 ## number of components | |
261 components = c($type_cond.method_cond.analysis_cond.plscv_comp) | |
262 | |
263 ## PLS-cvApply: | |
264 msidata.cv.pls <- cvApply(msidata, .y = y_vector, .fold = fold_vector, .fun = "PLS", ncomp = components) | |
265 | |
266 ## create table with summary | |
267 count = 1 | |
268 summary_plscv = list() | |
269 accuracy_vector = numeric() | |
270 for (iteration in components){ | |
271 | |
272 summary_iteration = summary(msidata.cv.pls)\$accuracy[[paste0("ncomp = ", iteration)]] | |
273 summary_iteration = cbind(rownames(summary_iteration), summary_iteration) ## include rownames in table | |
274 accuracy_vector[count] = summary_iteration[1,2] ## vector with accuracies to find later maximum for plot | |
275 empty_row = c(paste0("ncomp = ", iteration), rep( "", length(levels(y_vector)))) ## add line with ncomp for each iteration | |
276 ##rownames(labeled_iteration)[1] = paste0("ncomp = ", iteration) | |
277 ##labeled_iteration = cbind(rownames(labeled_iteration), labeled_iteration) | |
278 labeled_iteration = rbind(empty_row, summary_iteration) | |
279 | |
280 summary_plscv[[count]] = labeled_iteration | |
281 count = count+1} ## create list with summary table for each component | |
282 ## create dataframe from list | |
283 summary_plscv = do.call(rbind, summary_plscv) | |
284 summary_df = as.data.frame(summary_plscv) | |
285 rownames(summary_df) = NULL | |
286 | |
287 ## plots | |
288 ## plot to find ncomp with highest accuracy | |
289 plot(summary(msidata.cv.pls), main="Accuracy of PLS classification") | |
290 ncomp_max = components[which.max(accuracy_vector)] ## find ncomp with max. accuracy | |
291 ## one image for each sample/fold, 4 images per page | |
292 image(msidata.cv.pls, model = list(ncomp = ncomp_max), layout = c(2, 2)) | |
293 | |
294 par(opar) | |
295 ## print table with summary in pdf | |
296 plot(0,type='n',axes=FALSE,ann=FALSE) | |
297 title(main="Summary for the different components\n", adj=0.5) | |
298 ## summary for 4 components (20 rows) fits in one page: | |
299 if (length(components)<5){ | |
300 grid.table(summary_df, rows= NULL) | |
301 }else{ | |
302 grid.table(summary_df[1:20,], rows= NULL) | |
303 mincount = 21 | |
304 maxcount = 40 | |
305 for (count20 in 1:(ceiling(nrow(summary_df)/20)-1)){ | |
306 plot(0,type='n',axes=FALSE,ann=FALSE) | |
307 if (maxcount <= nrow(summary_df)){ | |
308 grid.table(summary_df[mincount:maxcount,], rows= NULL) | |
309 mincount = mincount+20 | |
310 maxcount = maxcount+20 | |
311 }else{### stop last page with last sample otherwise NA in table | |
312 grid.table(summary_df[mincount:nrow(summary_df),], rows= NULL)} | |
313 } | |
314 } | |
315 | |
316 ## optional output as .RData | |
317 #if $output_rdata: | |
318 save(msidata.cv.pls, file="$classification_rdata") | |
319 #end if | |
320 ######################## PLS - analysis ########################### | |
321 #elif str( $type_cond.method_cond.analysis_cond.PLS_method) == "PLS_analysis": | |
322 print("PLS analysis") | |
323 | |
324 ## number of components | |
325 component = c($type_cond.method_cond.analysis_cond.pls_comp) | |
326 | |
327 ### pls analysis | |
328 msidata.pls <- PLS(msidata, y = y_vector, ncomp = component, scale=$type_cond.method_cond.analysis_cond.pls_scale) | |
329 | |
330 ### plot of PLS coefficients | |
331 plot(msidata.pls, main="PLS coefficients per m/z") | |
332 | |
333 ### summary table of PLS | |
334 summary_table = summary(msidata.pls)\$accuracy[[paste0("ncomp = ",component)]] | |
335 summary_table = cbind(rownames(summary_table), data.frame(summary_table)) | |
336 rownames(summary_table) = NULL | |
337 print(summary_table) | |
338 ###plot(0,type='n',axes=FALSE,ann=FALSE) | |
339 ###grid.table(test, rows= TRUE) | |
340 | |
341 ### image of the best m/z | |
342 print(image(msidata, mz = topLabels(msidata.pls)[1,1], normalize.image = "linear", contrast.enhance = "histogram",smooth.image="gaussian", main="best m/z heatmap")) | |
343 | |
344 ## m/z and pixel information output | |
345 pls_classes = data.frame(msidata.pls\$classes[[1]]) | |
346 rownames(pls_classes) = names(pixels(msidata)) | |
347 colnames(pls_classes) = "predicted diagnosis" | |
348 pls_toplabels = topLabels(msidata.pls, n=$type_cond.method_cond.analysis_cond.pls_toplabels) | |
349 | |
350 write.table(pls_toplabels, file="$mzfeatures", quote = FALSE, row.names = TRUE, col.names=NA, sep = "\t") | |
351 write.table(pls_classes, file="$pixeloutput", quote = FALSE, row.names = TRUE, col.names=NA, sep = "\t") | |
352 | |
353 ## optional output as .RData | |
354 #if $output_rdata: | |
355 save(msidata.pls, file="$classification_rdata") | |
356 #end if | |
357 | |
358 #end if | |
359 | |
360 | |
361 ######################## OPLS ############################# | |
362 #elif str( $type_cond.method_cond.class_method) == "OPLS": | |
363 print("OPLS") | |
364 | |
365 ######################## OPLS -CV ############################# | |
366 #if str( $type_cond.method_cond.opls_analysis_cond.opls_method) == "opls_cvapply": | |
367 print("OPLS cv") | |
368 | |
369 ## folds | |
370 #if str($type_cond.method_cond.opls_analysis_cond.opls_fold_cond.opls_fold_vector) == "opls_fold_internal": | |
371 fold_vector = msidata\$$type_cond.method_cond.opls_analysis_cond.opls_fold_cond.opls_fold_name | |
372 #elif str($type_cond.method_cond.opls_analysis_cond.opls_fold_cond.opls_fold_vector) == "opls_fold_external": | |
373 fold_tabular = read.delim("$type_cond.method_cond.opls_analysis_cond.opls_fold_cond.opls_fold_data", header = FALSE, stringsAsFactors = FALSE) | |
374 fold_vector = as.factor(fold_tabular[,$type_cond.method_cond.opls_analysis_cond.opls_fold_cond.opls_fold_column]) | |
375 number_pixels = length(fold_vector) ## should be same as in data | |
376 #end if | |
377 | |
378 ## plot of folds | |
379 | |
380 position_df = cbind(coord(msidata)[,1:2], fold_vector) | |
381 fold_plot = ggplot(position_df, aes(x=x, y=y, fill=fold_vector))+ | |
382 geom_tile() + | |
383 coord_fixed()+ | |
384 ggtitle("Distribution of the fold variable")+ | |
385 theme_bw()+ | |
386 theme(text=element_text(family="ArialMT", face="bold", size=15))+ | |
387 theme(legend.position="bottom",legend.direction="vertical")+ | |
388 guides(fill=guide_legend(ncol=4,byrow=TRUE)) | |
389 coord_labels = aggregate(cbind(x,y)~fold_vector, data=position_df, mean, na.rm=TRUE, na.action="na.pass") | |
390 coord_labels\$file_number = gsub( "_.*$", "", coord_labels\$fold_vector) | |
391 print(fold_plot) | |
392 | |
393 ## number of components | |
394 components = c($type_cond.method_cond.opls_analysis_cond.opls_cvcomp) | |
395 | |
396 ## OPLS-cvApply: | |
397 msidata.cv.opls <- cvApply(msidata, .y = y_vector, .fold = fold_vector, .fun = "OPLS", ncomp = components, keep.Xnew = $type_cond.method_cond.opls_analysis_cond.xnew_cv) | |
398 | |
399 ## create table with summary | |
400 count = 1 | |
401 summary_oplscv = list() | |
402 accuracy_vector = numeric() | |
403 for (iteration in components){ | |
404 summary_iteration = summary(msidata.cv.opls)\$accuracy[[paste0("ncomp = ", iteration)]] | |
405 summary_iteration = cbind(rownames(summary_iteration), summary_iteration) ## include rownames in table | |
406 accuracy_vector[count] = summary_iteration[1,2] ## vector with accuracies to find later maximum for plot | |
407 empty_row = c(paste0("ncomp = ", iteration), rep( "", length(levels(y_vector)))) ## add line with ncomp for each iteration | |
408 ##rownames(labeled_iteration)[1] = paste0("ncomp = ", iteration) | |
409 ##labeled_iteration = cbind(rownames(labeled_iteration), labeled_iteration) | |
410 labeled_iteration = rbind(empty_row, summary_iteration) | |
411 summary_oplscv[[count]] = labeled_iteration ## create list with summary table for each component | |
412 count = count+1} | |
413 ## create dataframe from list | |
414 summary_oplscv = do.call(rbind, summary_oplscv) | |
415 summary_df = as.data.frame(summary_oplscv) | |
416 rownames(summary_df) = NULL | |
417 | |
418 ## plots | |
419 ## plot to find ncomp with highest accuracy | |
420 plot(summary(msidata.cv.opls), main="Accuracy of OPLS classification") | |
421 ncomp_max = components[which.max(accuracy_vector)] ## find ncomp with max. accuracy | |
422 ## one image for each sample/fold, 4 images per page | |
423 image(msidata.cv.opls, model = list(ncomp = ncomp_max), layout = c(2, 2)) | |
424 | |
425 par(opar) | |
426 ## print table with summary in pdf | |
427 plot(0,type='n',axes=FALSE,ann=FALSE) | |
428 title(main="Summary for the different components\n", adj=0.5) | |
429 ## summary for 4 components (20 rows) fits in one page: | |
430 if (length(components)<5){ | |
431 grid.table(summary_df, rows= NULL) | |
432 }else{ | |
433 grid.table(summary_df[1:20,], rows= NULL) | |
434 mincount = 21 | |
435 maxcount = 40 | |
436 for (count20 in 1:(ceiling(nrow(summary_df)/20)-1)){ | |
437 plot(0,type='n',axes=FALSE,ann=FALSE) | |
438 if (maxcount <= nrow(summary_df)){ | |
439 grid.table(summary_df[mincount:maxcount,], rows= NULL) | |
440 mincount = mincount+20 | |
441 maxcount = maxcount+20 | |
442 }else{### stop last page with last sample otherwise NA in table | |
443 grid.table(summary_df[mincount:nrow(summary_df),], rows= NULL)} | |
444 } | |
445 } | |
446 | |
447 ## optional output as .RData | |
448 #if $output_rdata: | |
449 save(msidata.cv.opls, file="$classification_rdata") | |
450 #end if | |
451 | |
452 ######################## OPLS -analysis ########################### | |
453 #elif str( $type_cond.method_cond.opls_analysis_cond.opls_method) == "opls_analysis": | |
454 print("OPLS analysis") | |
455 | |
456 ## number of components | |
457 component = c($type_cond.method_cond.opls_analysis_cond.opls_comp) | |
458 | |
459 ### opls analysis | |
460 msidata.opls <- PLS(msidata, y = y_vector, ncomp = component, scale=$type_cond.method_cond.opls_analysis_cond.opls_scale, keep.Xnew = $type_cond.method_cond.opls_analysis_cond.xnew) | |
461 | |
462 ### plot of OPLS coefficients | |
463 plot(msidata.opls, main="OPLS coefficients per m/z") | |
464 | |
465 ### summary table of OPLS | |
466 summary_table = summary(msidata.opls)\$accuracy[[paste0("ncomp = ",component)]] | |
467 summary_table = cbind(rownames(summary_table), summary_table) | |
468 rownames(summary_table) = NULL | |
469 summary_table = data.frame(summary_table) | |
470 print(summary_table) | |
471 ###plot(0,type='n',axes=FALSE,ann=FALSE) | |
472 ###grid.table(test, rows= TRUE) | |
473 | |
474 ### image of the best m/z | |
475 print(image(msidata, mz = topLabels(msidata.opls)[1,1], normalize.image = "linear", contrast.enhance = "histogram",smooth.image="gaussian", main="best m/z heatmap")) | |
476 | |
477 ## m/z and pixel information output | |
478 opls_classes = data.frame(msidata.opls\$classes[[1]]) | |
479 rownames(opls_classes) = names(pixels(msidata)) | |
480 colnames(opls_classes) = "predicted diagnosis" | |
481 opls_toplabels = topLabels(msidata.opls, n=$type_cond.method_cond.opls_analysis_cond.opls_toplabels) | |
482 | |
483 write.table(opls_toplabels, file="$mzfeatures", quote = FALSE, row.names = TRUE, col.names=NA, sep = "\t") | |
484 write.table(opls_classes, file="$pixeloutput", quote = FALSE, row.names = TRUE, col.names=NA, sep = "\t") | |
485 | |
486 ## optional output as .RData | |
487 #if $output_rdata: | |
488 save(msidata.opls, file="$classification_rdata") | |
489 #end if | |
490 | |
491 #end if | |
492 | |
493 ######################## SSC ############################# | |
494 #elif str( $type_cond.method_cond.class_method) == "spatialShrunkenCentroids": | |
495 print("SSC") | |
496 | |
497 ######################## SSC - CV ############################# | |
498 #if str( $type_cond.method_cond.ssc_analysis_cond.ssc_method) == "ssc_cvapply": | |
499 print("SSC cv") | |
500 | |
501 ## folds | |
502 #if str($type_cond.method_cond.ssc_analysis_cond.ssc_fold_cond.ssc_fold_vector) == "ssc_fold_internal": | |
503 fold_vector = msidata\$$type_cond.method_cond.ssc_analysis_cond.ssc_fold_cond.ssc_fold_name | |
504 | |
505 #elif str($type_cond.method_cond.ssc_analysis_cond.ssc_fold_cond.ssc_fold_vector) == "ssc_fold_external": | |
506 fold_tabular = read.delim("$type_cond.method_cond.ssc_analysis_cond.ssc_fold_cond.ssc_fold_data", header = FALSE, stringsAsFactors = FALSE) | |
507 fold_vector = as.factor(fold_tabular[,$type_cond.method_cond.ssc_analysis_cond.ssc_fold_cond.ssc_fold_column]) | |
508 number_pixels = length(fold_vector) ## should be same as in data | |
509 #end if | |
510 | |
511 ## plot of folds | |
512 position_df = cbind(coord(msidata)[,1:2], fold_vector) | |
513 fold_plot = ggplot(position_df, aes(x=x, y=y, fill=fold_vector))+ | |
514 geom_tile() + | |
515 coord_fixed()+ | |
516 ggtitle("Distribution of the fold variable")+ | |
517 theme_bw()+ | |
518 theme(text=element_text(family="ArialMT", face="bold", size=15))+ | |
519 theme(legend.position="bottom",legend.direction="vertical")+ | |
520 guides(fill=guide_legend(ncol=4,byrow=TRUE)) | |
521 coord_labels = aggregate(cbind(x,y)~fold_vector, data=position_df, mean, na.rm=TRUE, na.action="na.pass") | |
522 coord_labels\$file_number = gsub( "_.*$", "", coord_labels\$fold_vector) | |
523 print(fold_plot) | |
524 | |
525 ## SSC-cvApply: | |
526 msidata.cv.ssc <- cvApply(msidata, .y = y_vector,.fold = fold_vector,.fun = "spatialShrunkenCentroids", r = c($type_cond.method_cond.ssc_r), s = c($type_cond.method_cond.ssc_s), method = "$type_cond.method_cond.ssc_kernel_method") | |
527 | |
528 ## create table with summary | |
529 count = 1 | |
530 summary_ssccv = list() | |
531 accuracy_vector = numeric() | |
532 | |
533 for (iteration in names(msidata.cv.ssc@resultData[[1]][,1])){ | |
534 summary_iteration = summary(msidata.cv.ssc)\$accuracy[[iteration]] | |
535 summary_iteration = cbind(rownames(summary_iteration), summary_iteration) ## include rownames in table | |
536 accuracy_vector[count] = summary_iteration[1,2] ## vector with accuracies to find later maximum for plot | |
537 empty_row = c(iteration, rep( "", length(levels(y_vector)))) ## add line with ncomp for each iteration | |
538 labeled_iteration = rbind(empty_row, summary_iteration) | |
539 summary_ssccv[[count]] = labeled_iteration ## create list with summary table for each component | |
540 count = count+1 | |
541 } | |
542 | |
543 ##create dataframe from list | |
544 summary_ssccv = do.call(rbind, summary_ssccv) | |
545 summary_df = as.data.frame(summary_ssccv) | |
546 rownames(summary_df) = NULL | |
547 | |
548 ## plot to find parameters with highest accuracy | |
549 plot(summary(msidata.cv.ssc), main="Accuracy of SSC classification") | |
550 best_params = names(msidata.cv.ssc@resultData[[1]][,1])[which.max(accuracy_vector)] ## find parameters with max. accuracy | |
551 r_value = as.numeric(substring(unlist(strsplit(best_params, ","))[1], 4)) | |
552 s_value = as.numeric(substring(unlist(strsplit(best_params, ","))[3], 5)) ## remove space | |
553 | |
554 image(msidata.cv.ssc, model = list( r = r_value, s = s_value ), layout=c(2,2)) | |
555 | |
556 par(opar) | |
557 ## print table with summary in pdf | |
558 plot(0,type='n',axes=FALSE,ann=FALSE) | |
559 title(main="Summary for the different parameters\n", adj=0.5) | |
560 ## summary for 4 parameters (20 rows) fits in one page: | |
561 if (length(names(msidata.cv.ssc@resultData[[1]][,1]))<5){ | |
562 grid.table(summary_df, rows= NULL) | |
563 }else{ | |
564 grid.table(summary_df[1:20,], rows= NULL) | |
565 mincount = 21 | |
566 maxcount = 40 | |
567 for (count20 in 1:(ceiling(nrow(summary_df)/20)-1)){ | |
568 plot(0,type='n',axes=FALSE,ann=FALSE) | |
569 if (maxcount <= nrow(summary_df)){ | |
570 grid.table(summary_df[mincount:maxcount,], rows= NULL) | |
571 mincount = mincount+20 | |
572 maxcount = maxcount+20 | |
573 }else{### stop last page with last sample otherwise NA in table | |
574 grid.table(summary_df[mincount:nrow(summary_df),], rows= NULL)} | |
575 } | |
576 } | |
577 | |
578 ## optional output as .RData | |
579 #if $output_rdata: | |
580 save(msidata.cv.opls, file="$classification_rdata") | |
581 #end if | |
582 | |
583 ######################## SSC -analysis ########################### | |
584 #elif str( $type_cond.method_cond.ssc_analysis_cond.ssc_method) == "ssc_analysis": | |
585 print("SSC analysis") | |
586 | |
587 ## SSC analysis | |
588 msidata.ssc <- spatialShrunkenCentroids(msidata, y = y_vector, .fold = fold_vector, | |
589 r = c($type_cond.method_cond.ssc_r), s = c($type_cond.method_cond.ssc_s), method = "$type_cond.method_cond.ssc_kernel_method") | |
590 | |
591 plot(msidata.ssc, mode = "tstatistics", model = list("r" = c($type_cond.method_cond.ssc_r), "s" = c($type_cond.method_cond.ssc_s))) | |
592 | |
593 ### summary table SSC | |
594 | |
595 ##summary(msidata.ssc)\$accuracy[[names(msidata.ssc@resultData)]] | |
596 summary_table = summary(msidata.ssc) | |
597 print(summary_table) | |
598 ##summary_table = cbind(rownames(summary_table), summary_table) | |
599 ##rownames(summary_table) = NULL | |
600 | |
601 ###plot(0,type='n',axes=FALSE,ann=FALSE) | |
602 ###grid.table(summary_table, rows= TRUE) | |
603 | |
604 ### image of the best m/z | |
605 print(image(msidata, mz = topLabels(msidata.ssc)[1,1], normalize.image = "linear", contrast.enhance = "histogram",smooth.image="gaussian", main="best m/z heatmap")) | |
606 | |
607 ## m/z and pixel information output | |
608 ssc_classes = data.frame(msidata.ssc\$classes[[1]]) | |
609 rownames(ssc_classes) = names(pixels(msidata)) | |
610 colnames(ssc_classes) = "predicted diagnosis" | |
611 ssc_toplabels = topLabels(msidata.ssc) | |
612 | |
613 write.table(ssc_toplabels, file="$mzfeatures", quote = FALSE, row.names = TRUE, col.names=NA, sep = "\t") | |
614 write.table(ssc_classes, file="$pixeloutput", quote = FALSE, row.names = TRUE, col.names=NA, sep = "\t") | |
615 | |
616 ## optional output as .RData | |
617 #if $output_rdata: | |
618 save(msidata.ssc, file="$classification_rdata") | |
619 #end if | |
620 | |
621 #end if | |
622 #end if | |
623 | |
624 | |
625 ######################## II) Prediction ############################# | |
626 ############################################################################# | |
627 #elif str( $type_cond.type_method) == "prediction": | |
628 print("prediction") | |
629 | |
630 #if str($type_cond.new_y.new_y_values) == "no_new_y": | |
631 new_y_vector = FALSE | |
632 #elif str($type_cond.new_y.new_y_values) == "new_y_internal": | |
633 new_y_vector = msidata\$$type_cond.new_y.new_y_name | |
634 #elif str($type_cond.new_y.new_y_values) == "new_y_external": | |
635 | |
636 new_y_tabular = read.delim("$type_cond.new_y.new_y_data", header = FALSE, stringsAsFactors = FALSE) | |
637 new_y_vector = new_y_tabular[,$type_cond.new_y.new_y_column] | |
638 number_pixels = length(new_y_vector) ## should be same as in data | |
639 #end if | |
640 | |
641 training_data = loadRData("$type_cond.training_result") | |
642 prediction = predict(training_data,msidata, newy = new_y_vector) | |
643 | |
644 ## optional output as .RData | |
645 #if $output_rdata: | |
646 msidata = prediction | |
647 save(msidata, file="$classification_rdata") | |
648 #end if | |
649 #end if | |
650 | |
651 dev.off() | |
652 }else{ | |
653 print("Inputfile has no intensities > 0") | |
654 dev.off() | |
655 } | |
656 | |
657 ]]></configfile> | |
658 </configfiles> | |
659 <inputs> | |
660 <param name="infile" type="data" format="imzml, rdata, analyze75" | |
661 label="Inputfile as imzML, Analyze7.5 or Cardinal MSImageSet saved as RData" | |
662 help="Upload composite datatype imzml (ibd+imzML) or analyze75 (hdr+img+t2m) or regular upload .RData (Cardinal MSImageSet)"/> | |
663 <conditional name="processed_cond"> | |
664 <param name="processed_file" type="select" label="Is the input file a processed imzML file "> | |
665 <option value="no_processed" selected="True">not a processed imzML</option> | |
666 <option value="processed">processed imzML</option> | |
667 </param> | |
668 <when value="no_processed"/> | |
669 <when value="processed"> | |
670 <param name="accuracy" type="float" value="50" label="Mass accuracy to which the m/z values will be binned" help="This should be set to the native accuracy of the mass spectrometer, if known"/> | |
671 <param name="units" display="radio" type="select" label="Unit of the mass accuracy" help="either m/z or ppm"> | |
672 <option value="mz" >mz</option> | |
673 <option value="ppm" selected="True" >ppm</option> | |
674 </param> | |
675 </when> | |
676 </conditional> | |
677 | |
678 <conditional name="type_cond"> | |
679 <param name="type_method" type="select" label="Analysis step to perform"> | |
680 <option value="training" selected="True">training</option> | |
681 <option value="prediction">prediction</option> | |
682 </param> | |
683 <when value="training"> | |
684 | |
685 <conditional name="method_cond"> | |
686 <param name="class_method" type="select" label="Select the method for classification"> | |
687 <option value="PLS" selected="True">PLS</option> | |
688 <option value="OPLS">OPLS</option> | |
689 <option value="spatialShrunkenCentroids">spatial shrunken centroids</option> | |
690 </param> | |
691 <when value="PLS"> | |
692 | |
693 <conditional name="analysis_cond"> | |
694 <param name="PLS_method" type="select" label="Crossvalidation or analysis"> | |
695 <option value="cvapply" selected="True">cvApply</option> | |
696 <option value="PLS_analysis">PLS analysis</option> | |
697 </param> | |
698 <when value="cvapply"> | |
699 | |
700 <param name="plscv_comp" type="text" value="1:2" | |
701 label="The number of PLS components" help="Multiple values are allowed (e.g. 1,2,3 or 2:5)"/> | |
702 <conditional name="fold_cond"> | |
703 <param name="fold_vector" type="select" label="Define the fold variable"> | |
704 <option value="fold_internal" selected="True">dataset contains already fold</option> | |
705 <option value="fold_external">use fold from tabular file</option> | |
706 </param> | |
707 <when value="fold_internal"> | |
708 <param name="fold_name" type="text" value="sample" label="Name of the pData slot where fold is stored" help="each fold must contain pixels of all categories"/> | |
709 </when> | |
710 <when value="fold_external"> | |
711 <param name="fold_data" type="data" format="tabular" label="Tabular file with column for folds" help="Number of rows must be number of pixels"/> | |
712 <param name="fold_column" data_ref="fold_data" label="Column with folds" type="data_column"/> | |
713 </when> | |
714 </conditional> | |
715 </when> | |
716 | |
717 <when value="PLS_analysis"> | |
718 <param name="pls_comp" type="integer" value="5" | |
719 label="The optimal number of PLS components as indicated by cross-validations" help="Run cvApply first to optain optiaml number of PLS components"/> | |
720 <param name="pls_scale" type="boolean" display="radio" label="data scaling" truevalue="TRUE" falsevalue="FALSE"/> | |
721 <param name="pls_toplabels" type="integer" value="100" | |
722 label="Number of toplabels (masses) which should be written in tabular output"/> | |
723 </when> | |
724 </conditional> | |
725 </when> | |
726 | |
727 <when value="OPLS"> | |
728 | |
729 <conditional name="opls_analysis_cond"> | |
730 <param name="opls_method" type="select" label="Analysis step to perform"> | |
731 <option value="opls_cvapply" selected="True">cvApply</option> | |
732 <option value="opls_analysis">OPLS analysis</option> | |
733 </param> | |
734 | |
735 <when value="opls_cvapply"> | |
736 <param name="opls_cvcomp" type="text" value="1:2" | |
737 label="The number of OPLS components" help="Multiple values are allowed (e.g. 1,2,3 or 2:5)"/> | |
738 <param name="xnew_cv" type="boolean" display="radio" truevalue="TRUE" falsevalue="FALSE" label="Keep new matrix"/> | |
739 <conditional name="opls_fold_cond"> | |
740 <param name="opls_fold_vector" type="select" label="Define the fold variable"> | |
741 <option value="opls_fold_internal" selected="True">dataset contains already fold</option> | |
742 <option value="opls_fold_external">use fold from tabular file</option> | |
743 </param> | |
744 <when value="opls_fold_internal"> | |
745 <param name="opls_fold_name" type="text" value="sample" label="Name of the pData slot where fold is stored" help="each fold must contain pixels of all categories"/> | |
746 </when> | |
747 <when value="opls_fold_external"> | |
748 <param name="opls_fold_data" type="data" format="tabular" label="Tabular file with column for folds" help="Number of rows must be number of pixels"/> | |
749 <param name="opls_fold_column" data_ref="opls_fold_data" label="Column with folds" type="data_column"/> | |
750 </when> | |
751 </conditional> | |
752 </when> | |
753 | |
754 <when value="opls_analysis"> | |
755 <param name="opls_comp" type="integer" value="5" | |
756 label="The optimal number of PLS components as indicated by cross-validations" help="Run cvApply first to optain optiaml number of PLS components"/> | |
757 <param name="xnew" type="boolean" display="radio" truevalue="TRUE" falsevalue="FALSE" label="Keep new matrix"/> | |
758 <param name="opls_scale" type="select" label="data scaling" display="radio" optional="False"> | |
759 <option value="TRUE">yes</option> | |
760 <option value="FALSE" selected="True">no</option> | |
761 </param> | |
762 <param name="opls_toplabels" type="integer" value="100" | |
763 label="Number of toplabels (features) which should be written in tabular output"/> | |
764 </when> | |
765 </conditional> | |
766 </when> | |
767 | |
768 <when value="spatialShrunkenCentroids"> | |
769 <conditional name="ssc_analysis_cond"> | |
770 <param name="ssc_method" type="select" label="Analysis step to perform"> | |
771 <option value="ssc_cvapply" selected="True">cvApply</option> | |
772 <option value="ssc_analysis">spatial shrunken centroids analysis</option> | |
773 </param> | |
774 <when value="ssc_cvapply"> | |
775 | |
776 <conditional name="ssc_fold_cond"> | |
777 <param name="ssc_fold_vector" type="select" label="Define the fold variable"> | |
778 <option value="ssc_fold_internal" selected="True">dataset contains already fold</option> | |
779 <option value="ssc_fold_external">use fold from tabular file</option> | |
780 </param> | |
781 <when value="ssc_fold_internal"> | |
782 <param name="ssc_fold_name" type="text" value="sample" label="Name of the pData slot where fold is stored" help="each fold must contain pixels of all categories"/> | |
783 </when> | |
784 <when value="ssc_fold_external"> | |
785 <param name="ssc_fold_data" type="data" format="tabular" label="Tabular file with column for folds" help="Number of rows must be number of pixels"/> | |
786 <param name="ssc_fold_column" data_ref="ssc_fold_data" label="Column with folds" type="data_column"/> | |
787 </when> | |
788 </conditional> | |
789 </when> | |
790 | |
791 <when value="ssc_analysis"> | |
792 | |
793 <param name="ssc_toplabels" type="integer" value="100" | |
794 label="Number of toplabels (features) which should be written in tabular output"/> | |
795 </when> | |
796 </conditional> | |
797 <param name="ssc_r" type="text" value="2" | |
798 label="The spatial neighborhood radius of nearby pixels to consider (r)" help="For cvapply multiple values are allowed (e.g. 1,2,3 or 2:5)"/> | |
799 <param name="ssc_s" type="text" value="2" | |
800 label="The sparsity thresholding parameter by which to shrink the t-statistics (s)" help="For cvapply multiple values are allowed (e.g. 1,2,3 or 2:5)"/> | |
801 <param name="ssc_kernel_method" type="select" display="radio" label = "The method to use to calculate the spatial smoothing kernels for the embedding. The 'gaussian' method refers to spatially-aware (SA) weights, and 'adaptive' refers to spatially-aware structurally-adaptive (SASA) weights"> | |
802 <option value="gaussian">gaussian</option> | |
803 <option value="adaptive" selected="True">adaptive</option> | |
804 </param> | |
805 | |
806 </when> | |
807 </conditional> | |
808 <conditional name="y_cond"> | |
809 <param name="y_vector" type="select" label="Define the response variable y"> | |
810 <option value="y_internal" selected="True">dataset contains already y</option> | |
811 <option value="y_external">use y from tabular file</option> | |
812 </param> | |
813 <when value="y_internal"> | |
814 <param name="y_name" type="text" value="combined_sample" label="Name of the pData slot where y is stored" help="Outputs of MSI_combine tool have 'combined_sample' as name"/> | |
815 </when> | |
816 <when value="y_external"> | |
817 <param name="y_data" type="data" format="tabular" label="Tabular file with column for y response"/> | |
818 <param name="y_column" data_ref="y_data" label="Column with y response" type="data_column"/> | |
819 </when> | |
820 </conditional> | |
821 </when> | |
822 | |
823 <when value="prediction"> | |
824 <param name="training_result" type="data" format="rdata" label="Result from previous classification training"/> | |
825 <conditional name="new_y"> | |
826 <param name="new_y_values" type="select" label="Define the new response y"> | |
827 <option value="no_new_y" >no new y response</option> | |
828 <option value="new_y_internal" selected="True">dataset contains already y</option> | |
829 <option value="new_y_external">use y from tabular file</option> | |
830 </param> | |
831 <when value="no_new_y"/> | |
832 <when value="new_y_internal"> | |
833 <param name="new_y_name" type="text" value="combined_sample" label="Name of the pData slot where y is stored" help="data merged with MSI_combine tool has 'combined_sample' as name"/> | |
834 </when> | |
835 | |
836 <when value="new_y_external"> | |
837 <param name="new_y_data" type="data" format="tabular" label="Tabular file with column for y response"/> | |
838 <param name="new_y_column" data_ref="new_y_data" label="Column with y response" type="data_column"/> | |
839 </when> | |
840 </conditional> | |
841 </when> | |
842 </conditional> | |
843 <param name="output_rdata" type="boolean" display="radio" label="Results as .RData output"/> | |
844 </inputs> | |
845 <outputs> | |
846 <data format="pdf" name="classification_images" from_work_dir="classificationpdf.pdf" label = "$infile.display_name classification"/> | |
847 <data format="tabular" name="mzfeatures" label="$infile.display_name features"/> | |
848 <data format="tabular" name="pixeloutput" label="$infile.display_name pixels"/> | |
849 <data format="rdata" name="classification_rdata" label="$infile.display_name classification"> | |
850 <filter>output_rdata</filter> | |
851 </data> | |
852 </outputs> | |
853 <tests> | |
854 <test expect_num_outputs="3"> | |
855 <param name="infile" value="testfile_squares.rdata" ftype="rdata"/> | |
856 <conditional name="type_cond"> | |
857 <param name="type_method" value="training"/> | |
858 <conditional name="method_cond"> | |
859 <param name="class_method" value="PLS"/> | |
860 <conditional name="analysis_cond"> | |
861 <param name="PLS_method" value="cvapply"/> | |
862 | |
863 <param name="plscv_comp" value="2:4"/> | |
864 <conditional name="fold_cond"> | |
865 <param name="fold_vector" value="fold_external"/> | |
866 <param name="fold_data" value="pixel_annotation_file1.tabular" ftype="tabular"/> | |
867 <param name="fold_column" value="1"/> | |
868 </conditional> | |
869 | |
870 </conditional> | |
871 </conditional> | |
872 <conditional name="y_cond"> | |
873 <param name="y_vector" value="y_external"/> | |
874 <param name="y_data" value="pixel_annotation_file1.tabular" ftype="tabular"/> | |
875 <param name="y_column" value="2"/> | |
876 </conditional> | |
877 </conditional> | |
878 <output name="mzfeatures" file="features_test1.tabular"/> | |
879 <output name="pixeloutput" file="pixels_test1.tabular"/> | |
880 <output name="classification_images" file="test1.pdf" compare="sim_size" delta="20000"/> | |
881 </test> | |
882 | |
883 <test expect_num_outputs="4"> | |
884 <param name="infile" value="testfile_squares.rdata" ftype="rdata"/> | |
885 <conditional name="type_cond"> | |
886 <param name="type_method" value="training"/> | |
887 <conditional name="method_cond"> | |
888 <param name="class_method" value="PLS"/> | |
889 <conditional name="analysis_cond"> | |
890 <param name="PLS_method" value="PLS_analysis"/> | |
891 | |
892 <param name="pls_comp" value="2"/> | |
893 <param name="pls_scale" value="TRUE"/> | |
894 <param name="pls_toplabels" value="100"/> | |
895 <conditional name="fold_cond"> | |
896 <param name="fold_vector" value="fold_external"/> | |
897 <param name="fold_data" value="pixel_annotation_file1.tabular" ftype="tabular"/> | |
898 <param name="fold_column" value="1"/> | |
899 </conditional> | |
900 | |
901 </conditional> | |
902 </conditional> | |
903 <conditional name="y_cond"> | |
904 <param name="y_vector" value="y_external"/> | |
905 <param name="y_data" value="pixel_annotation_file1.tabular" ftype="tabular"/> | |
906 <param name="y_column" value="2"/> | |
907 </conditional> | |
908 </conditional> | |
909 <param name="output_rdata" value="True"/> | |
910 <output name="mzfeatures" file="features_test2.tabular"/> | |
911 <output name="pixeloutput" file="pixels_test2.tabular"/> | |
912 <output name="classification_images" file="test2.pdf" compare="sim_size" delta="20000"/> | |
913 <output name="classification_rdata" file="test2.rdata" compare="sim_size" /> | |
914 </test> | |
915 | |
916 <test expect_num_outputs="3"> | |
917 <param name="infile" value="testfile_squares.rdata" ftype="rdata"/> | |
918 <conditional name="type_cond"> | |
919 <param name="type_method" value="training"/> | |
920 <conditional name="method_cond"> | |
921 <param name="class_method" value="OPLS"/> | |
922 <conditional name="opls_analysis_cond"> | |
923 <param name="opls_method" value="opls_analysis"/> | |
924 | |
925 <param name="opls_cvcomp" value="1:2"/> | |
926 <param name="xnew_cv" value="FALSE"/> | |
927 <conditional name="opls_fold_cond"> | |
928 <param name="opls_fold_vector" value="opls_fold_external"/> | |
929 <param name="opls_fold_data" ftype="tabular" value="random_factors.tabular"/> | |
930 <param name="opls_fold_column" value="1"/> | |
931 </conditional> | |
932 </conditional> | |
933 </conditional> | |
934 <conditional name="y_cond"> | |
935 <param name="y_vector" value="y_external"/> | |
936 <param name="y_data" value="random_factors.tabular" ftype="tabular"/> | |
937 <param name="y_column" value="2"/> | |
938 </conditional> | |
939 </conditional> | |
940 <output name="mzfeatures" file="features_test3.tabular"/> | |
941 <output name="pixeloutput" file="pixels_test3.tabular"/> | |
942 <output name="classification_images" file="test3.pdf" compare="sim_size" delta="20000"/> | |
943 </test> | |
944 | |
945 <test expect_num_outputs="4"> | |
946 <param name="infile" value="testfile_squares.rdata" ftype="rdata"/> | |
947 <conditional name="type_cond"> | |
948 <param name="type_method" value="training"/> | |
949 <conditional name="method_cond"> | |
950 <param name="class_method" value="OPLS"/> | |
951 <conditional name="opls_analysis_cond"> | |
952 | |
953 <param name="opls_method" value="opls_analysis"/> | |
954 <param name="opls_comp" value="3"/> | |
955 <param name="xnew" value="FALSE"/> | |
956 <param name="opls_scale" value="FALSE"/> | |
957 <param name="opls_toplabels" value="100"/> | |
958 </conditional> | |
959 | |
960 </conditional> | |
961 <conditional name="y_cond"> | |
962 <param name="y_vector" value="y_external"/> | |
963 <param name="y_data" value="random_factors.tabular" ftype="tabular"/> | |
964 <param name="y_column" value="2"/> | |
965 </conditional> | |
966 </conditional> | |
967 <param name="output_rdata" value="True"/> | |
968 <output name="mzfeatures" file="features_test4.tabular"/> | |
969 <output name="pixeloutput" file="pixels_test4.tabular"/> | |
970 <output name="classification_images" file="test4.pdf" compare="sim_size" delta="20000"/> | |
971 <output name="classification_rdata" file="test4.rdata" compare="sim_size" /> | |
972 </test> | |
973 | |
974 <test expect_num_outputs="3"> | |
975 <param name="infile" value="testfile_squares.rdata" ftype="rdata"/> | |
976 <conditional name="type_cond"> | |
977 <param name="type_method" value="training"/> | |
978 <conditional name="method_cond"> | |
979 <param name="class_method" value="spatialShrunkenCentroids"/> | |
980 <conditional name="ssc_analysis_cond"> | |
981 <param name="ssc_method" value="ssc_cvapply"/> | |
982 <conditional name="ssc_fold_cond"> | |
983 <param name="ssc_fold_vector" value="ssc_fold_external"/> | |
984 <param name="ssc_fold_data" value="pixel_annotation_file1.tabular" ftype="tabular"/> | |
985 <param name="ssc_fold_column" value="1"/> | |
986 </conditional> | |
987 <param name="ssc_r" value="1:2"/> | |
988 <param name="ssc_s" value="2:3"/> | |
989 <param name="ssc_kernel_method" value="adaptive"/> | |
990 </conditional> | |
991 </conditional> | |
992 <conditional name="y_cond"> | |
993 <param name="y_vector" value="y_external"/> | |
994 <param name="y_data" value="pixel_annotation_file1.tabular" ftype="tabular"/> | |
995 <param name="y_column" value="2"/> | |
996 </conditional> | |
997 </conditional> | |
998 <output name="mzfeatures" file="features_test5.tabular"/> | |
999 <output name="pixeloutput" file="pixels_test5.tabular"/> | |
1000 <output name="classification_images" file="test5.pdf" compare="sim_size" delta="20000"/> | |
1001 </test> | |
1002 | |
1003 <test expect_num_outputs="4"> | |
1004 <param name="infile" value="testfile_squares.rdata" ftype="rdata"/> | |
1005 <conditional name="type_cond"> | |
1006 <param name="type_method" value="training"/> | |
1007 <conditional name="method_cond"> | |
1008 <param name="class_method" value="spatialShrunkenCentroids"/> | |
1009 <conditional name="ssc_analysis_cond"> | |
1010 <param name="ssc_method" value="ssc_analysis"/> | |
1011 <param name="ssc_toplabels" value="100"/> | |
1012 </conditional> | |
1013 <param name="ssc_r" value="2"/> | |
1014 <param name="ssc_s" value="2"/> | |
1015 <param name="ssc_kernel_method" value="adaptive"/> | |
1016 </conditional> | |
1017 <conditional name="y_cond"> | |
1018 <param name="y_vector" value="y_external"/> | |
1019 <param name="y_data" value="random_factors.tabular" ftype="tabular"/> | |
1020 <param name="y_column" value="2"/> | |
1021 </conditional> | |
1022 </conditional> | |
1023 <param name="output_rdata" value="True"/> | |
1024 <output name="mzfeatures" file="features_test6.tabular"/> | |
1025 <output name="pixeloutput" file="pixels_test6.tabular"/> | |
1026 <output name="classification_images" file="test6.pdf" compare="sim_size" delta="20000"/> | |
1027 <output name="classification_rdata" file="test6.rdata" compare="sim_size" /> | |
1028 </test> | |
1029 | |
1030 <test expect_num_outputs="4"> | |
1031 <param name="infile" value="testfile_squares.rdata" ftype="rdata"/> | |
1032 <conditional name="type_cond"> | |
1033 <param name="type_method" value="prediction"/> | |
1034 <param name="training_result" value="test2.rdata" ftype="rdata"/> | |
1035 <conditional name="new_y"> | |
1036 <param name="new_y_values" value="new_y_external"/> | |
1037 <param name="new_y_data" value="pixel_annotation_file1.tabular" ftype="tabular"/> | |
1038 <param name="new_y_column" value="2"/> | |
1039 </conditional> | |
1040 </conditional> | |
1041 <param name="output_rdata" value="True"/> | |
1042 <output name="mzfeatures" file="features_test7.tabular"/> | |
1043 <output name="pixeloutput" file="pixels_test7.tabular"/> | |
1044 <output name="classification_images" file="test7.pdf" compare="sim_size" delta="20000"/> | |
1045 <output name="classification_rdata" file="test7.rdata" compare="sim_size" /> | |
1046 </test> | |
1047 | |
1048 </tests> | |
1049 <help> | |
1050 <![CDATA[ | |
1051 | |
1052 Cardinal is an R package that implements statistical & computational tools for analyzing mass spectrometry imaging datasets. `More information on Cardinal <http://cardinalmsi.org//>`_ | |
1053 | |
1054 This tool provides three different Cardinal functions for supervised classification of mass-spectrometry imaging data. | |
1055 | |
1056 Input data: 3 types of input data can be used: | |
1057 | |
1058 - imzml file (upload imzml and ibd file via the "composite" function) `Introduction to the imzml format <https://ms-imaging.org/wp/imzml/>`_ | |
1059 - Analyze7.5 (upload hdr, img and t2m file via the "composite" function) | |
1060 - Cardinal "MSImageSet" data (with variable name "msidata", saved as .RData) | |
1061 | |
1062 Options: | |
1063 | |
1064 - PLS(-DA): partial least square (discriminant analysis) | |
1065 - O-PLS(-DA): Orthogonal partial least squares (discriminant analysis) | |
1066 - Spatial shrunken centroids | |
1067 | |
1068 Output: | |
1069 | |
1070 - Pdf with the heatmaps and plots for the classification | |
1071 - Tabular file with information on masses and pixels: toplabels/classes (PLS, spatial shrunken centroids) | |
1072 - optional RData output to further explore the results with Cardinal in R | |
1073 | |
1074 ]]> | |
1075 </help> | |
1076 <citations> | |
1077 <citation type="doi">10.1093/bioinformatics/btv146</citation> | |
1078 </citations> | |
1079 </tool> |