diff msi_classification.xml @ 0:f0b415eb3bcf draft default tip

planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/msi_classification commit 8087490eb4dcaf4ead0f03eae4126780d21e5503
author galaxyp
date Fri, 06 Jul 2018 14:12:51 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/msi_classification.xml	Fri Jul 06 14:12:51 2018 -0400
@@ -0,0 +1,1079 @@
+<tool id="mass_spectrometry_imaging_classification" name="MSI classification" version="1.10.0.0">
+    <description>spatial classification of mass spectrometry imaging data</description>
+    <requirements>
+        <requirement type="package" version="1.10.0">bioconductor-cardinal</requirement>
+        <requirement type="package" version="2.2.1">r-gridextra</requirement>
+        <requirement type="package" version="0.20-35">r-lattice</requirement>
+        <requirement type="package" version="2.2.1">r-ggplot2</requirement>
+    </requirements>
+    <command detect_errors="exit_code">
+    <![CDATA[
+
+        #if $infile.ext == 'imzml'
+            ln -s '${infile.extra_files_path}/imzml' infile.imzML &&
+            ln -s '${infile.extra_files_path}/ibd' infile.ibd &&
+        #elif $infile.ext == 'analyze75'
+            ln -s '${infile.extra_files_path}/hdr' infile.hdr &&
+            ln -s '${infile.extra_files_path}/img' infile.img &&
+            ln -s '${infile.extra_files_path}/t2m' infile.t2m &&
+        #else
+            ln -s $infile infile.RData &&
+        #end if
+        cat '${MSI_segmentation}' &&
+        echo ${MSI_segmentation} &&
+        Rscript '${MSI_segmentation}'
+
+    ]]>
+    </command>
+    <configfiles>
+        <configfile name="MSI_segmentation"><![CDATA[
+
+
+################################# load libraries and read file #########################
+
+library(Cardinal)
+library(gridExtra)
+library(lattice)
+library(ggplot2)
+
+
+#if $infile.ext == 'imzml'
+    #if str($processed_cond.processed_file) == "processed":
+        msidata <- readImzML('infile', mass.accuracy=$processed_cond.accuracy, units.accuracy = "$processed_cond.units")
+    #else
+        msidata <- readImzML('infile')
+    #end if
+#elif $infile.ext == 'analyze75'
+    msidata = readAnalyze('infile')
+#else
+    load('infile.RData')
+#end if
+
+## function to later read RData reference files in
+
+loadRData <- function(fileName){
+#loads an RData file, and returns it
+load(fileName)
+get(ls()[ls() != "fileName"])
+}
+
+## create full matrix to make processed imzML files compatible with classification
+iData(msidata) <- iData(msidata)[] 
+
+###################################### file properties in numbers ##############
+
+## Number of features (mz)
+maxfeatures = length(features(msidata))
+## Range mz
+minmz = round(min(mz(msidata)), digits=2)
+maxmz = round(max(mz(msidata)), digits=2)
+## Number of spectra (pixels)
+pixelcount = length(pixels(msidata))
+## Range x coordinates
+minimumx = min(coord(msidata)[,1])
+maximumx = max(coord(msidata)[,1])
+## Range y coordinates
+minimumy = min(coord(msidata)[,2])
+maximumy = max(coord(msidata)[,2])
+## Range of intensities
+minint = round(min(spectra(msidata)[]), digits=2)
+maxint = round(max(spectra(msidata)[]), digits=2)
+medint = round(median(spectra(msidata)[]), digits=2)
+## Number of intensities > 0
+npeaks= sum(spectra(msidata)[]>0)
+## Spectra multiplied with mz (potential number of peaks)
+numpeaks = ncol(spectra(msidata)[])*nrow(spectra(msidata)[])
+## Percentage of intensities > 0
+percpeaks = round(npeaks/numpeaks*100, digits=2)
+## Number of empty TICs
+TICs = colSums(spectra(msidata)[]) 
+NumemptyTIC = sum(TICs == 0)
+
+
+## Processing informations
+processinginfo = processingData(msidata)
+centroidedinfo = processinginfo@centroided # TRUE or FALSE
+
+## if TRUE write processinginfo if no write FALSE
+
+## normalization
+if (length(processinginfo@normalization) == 0) {
+  normalizationinfo='FALSE'
+} else {
+  normalizationinfo=processinginfo@normalization
+}
+## smoothing
+if (length(processinginfo@smoothing) == 0) {
+  smoothinginfo='FALSE'
+} else {
+  smoothinginfo=processinginfo@smoothing
+}
+## baseline
+if (length(processinginfo@baselineReduction) == 0) {
+  baselinereductioninfo='FALSE'
+} else {
+  baselinereductioninfo=processinginfo@baselineReduction
+}
+## peak picking
+if (length(processinginfo@peakPicking) == 0) {
+  peakpickinginfo='FALSE'
+} else {
+  peakpickinginfo=processinginfo@peakPicking
+}
+
+#############################################################################
+
+properties = c("Number of mz features",
+               "Range of mz values",
+               "Number of pixels", 
+               "Range of x coordinates", 
+               "Range of y coordinates",
+               "Range of intensities", 
+               "Median of intensities",
+               "Intensities > 0",
+               "Number of empty spectra",
+               "Preprocessing", 
+               "Normalization", 
+               "Smoothing",
+               "Baseline reduction",
+               "Peak picking",
+               "Centroided")
+
+values = c(paste0(maxfeatures), 
+           paste0(minmz, " - ", maxmz), 
+           paste0(pixelcount), 
+           paste0(minimumx, " - ", maximumx),  
+           paste0(minimumy, " - ", maximumy), 
+           paste0(minint, " - ", maxint), 
+           paste0(medint),
+           paste0(percpeaks, " %"), 
+           paste0(NumemptyTIC), 
+           paste0(" "),
+           paste0(normalizationinfo),
+           paste0(smoothinginfo),
+           paste0(baselinereductioninfo),
+           paste0(peakpickinginfo),
+           paste0(centroidedinfo))
+
+property_df = data.frame(properties, values)
+
+
+######################################## PDF ###################################
+################################################################################
+################################################################################
+
+Title = "Prediction"
+
+#if str( $type_cond.type_method) == "training":
+    #if str( $type_cond.method_cond.class_method) == "PLS":
+        Title = "PLS"
+    #elif str( $type_cond.method_cond.class_method) == "OPLS":
+        Title = "OPLS"
+    #elif str( $type_cond.method_cond.class_method) == "spatialShrunkenCentroids":
+        Title = "SSC"
+    #end if
+#end if
+
+pdf("classificationpdf.pdf", fonts = "Times", pointsize = 12)
+plot(0,type='n',axes=FALSE,ann=FALSE)
+
+
+title(main=paste0(Title," for file: \n\n", "$infile.display_name"))
+
+
+
+##################### I) numbers and control plots #############################
+###############################################################################
+
+## table with values
+grid.table(property_df, rows= NULL)
+
+if (npeaks > 0){
+
+opar <- par()
+ 
+    ######################## II) Training #############################
+    #############################################################################
+    #if str( $type_cond.type_method) == "training":
+        print("training")
+
+
+        ## load y response (will be needed in every training scenario)
+
+        #if str($type_cond.y_cond.y_vector) == "y_internal":
+            y_vector = msidata\$$type_cond.y_cond.y_name
+        #elif str($type_cond.y_cond.y_vector) == "y_external":
+            y_tabular = read.delim("$type_cond.y_cond.y_data", header = FALSE, stringsAsFactors = FALSE)
+            y_vector = as.factor(y_tabular[,$type_cond.y_cond.y_column])
+            number_pixels = length(y_vector) ## should be same as in data
+        #end if
+
+    ## plot of y vector
+
+    position_df = cbind(coord(msidata)[,1:2], y_vector)
+    y_plot = ggplot(position_df, aes(x=x, y=y, fill=y_vector))+
+           geom_tile() +
+           coord_fixed()+
+           ggtitle("Distribution of the response variable y")+
+           theme_bw()+
+           theme(text=element_text(family="ArialMT", face="bold", size=15))+
+           theme(legend.position="bottom",legend.direction="vertical")+
+           guides(fill=guide_legend(ncol=4,byrow=TRUE))
+    coord_labels = aggregate(cbind(x,y)~y_vector, data=position_df, mean, na.rm=TRUE, na.action="na.pass")
+    coord_labels\$file_number = gsub( "_.*$", "", coord_labels\$y_vector)
+    print(y_plot)
+
+
+        ######################## PLS #############################
+        #if str( $type_cond.method_cond.class_method) == "PLS":
+            print("PLS")
+
+            ######################## PLS - CV #############################
+            #if str( $type_cond.method_cond.analysis_cond.PLS_method) == "cvapply":
+                print("PLS cv")
+
+                ## folds
+                #if str($type_cond.method_cond.analysis_cond.fold_cond.fold_vector) == "fold_internal":
+
+                    fold_vector = msidata\$$type_cond.method_cond.analysis_cond.fold_cond.fold_name
+                #elif str($type_cond.method_cond.analysis_cond.fold_cond.fold_vector) == "fold_external":
+                    fold_tabular = read.delim("$type_cond.method_cond.analysis_cond.fold_cond.fold_data", header = FALSE, stringsAsFactors = FALSE)
+                    fold_vector = as.factor(fold_tabular[,$type_cond.method_cond.analysis_cond.fold_cond.fold_column])
+                    number_pixels = length(fold_vector) ## should be same as in data
+                #end if
+
+                ## plot of folds
+
+                    position_df = cbind(coord(msidata)[,1:2], fold_vector)
+                    fold_plot = ggplot(position_df, aes(x=x, y=y, fill=fold_vector))+
+                           geom_tile() +
+                           coord_fixed()+
+                           ggtitle("Distribution of the fold variable")+
+                           theme_bw()+
+                           theme(text=element_text(family="ArialMT", face="bold", size=15))+
+                           theme(legend.position="bottom",legend.direction="vertical")+
+                           guides(fill=guide_legend(ncol=4,byrow=TRUE))
+                    coord_labels = aggregate(cbind(x,y)~fold_vector, data=position_df, mean, na.rm=TRUE, na.action="na.pass")
+                    coord_labels\$file_number = gsub( "_.*$", "", coord_labels\$fold_vector)
+                    print(fold_plot)
+
+                ## number of components
+                components = c($type_cond.method_cond.analysis_cond.plscv_comp)
+
+                ## PLS-cvApply:
+                msidata.cv.pls <- cvApply(msidata, .y = y_vector, .fold = fold_vector, .fun = "PLS", ncomp = components)
+
+                ## create table with summary
+                count = 1
+                summary_plscv = list()
+                accuracy_vector = numeric()
+                for (iteration in components){
+
+                    summary_iteration = summary(msidata.cv.pls)\$accuracy[[paste0("ncomp = ", iteration)]]
+                    summary_iteration = cbind(rownames(summary_iteration), summary_iteration) ## include rownames in table
+                    accuracy_vector[count] = summary_iteration[1,2] ## vector with accuracies to find later maximum for plot
+                    empty_row = c(paste0("ncomp = ", iteration), rep( "", length(levels(y_vector)))) ## add line with ncomp for each iteration
+                    ##rownames(labeled_iteration)[1] = paste0("ncomp = ", iteration)
+                    ##labeled_iteration = cbind(rownames(labeled_iteration), labeled_iteration)
+                    labeled_iteration = rbind(empty_row, summary_iteration)
+
+                    summary_plscv[[count]] = labeled_iteration
+                    count = count+1} ## create list with summary table for each component
+                ## create dataframe from list
+                summary_plscv = do.call(rbind, summary_plscv) 
+                summary_df = as.data.frame(summary_plscv)
+                rownames(summary_df) = NULL
+
+                ## plots
+                ## plot to find ncomp with highest accuracy
+                plot(summary(msidata.cv.pls), main="Accuracy of PLS classification")
+                ncomp_max = components[which.max(accuracy_vector)] ## find ncomp with max. accuracy
+                ## one image for each sample/fold, 4 images per page
+                image(msidata.cv.pls, model = list(ncomp = ncomp_max), layout = c(2, 2))
+
+                par(opar)   
+                ## print table with summary in pdf
+                plot(0,type='n',axes=FALSE,ann=FALSE)
+                title(main="Summary for the different components\n", adj=0.5)
+                ## summary for 4 components (20 rows) fits in one page:
+                if (length(components)<5){
+                    grid.table(summary_df, rows= NULL)
+                }else{
+                    grid.table(summary_df[1:20,], rows= NULL)
+                    mincount = 21
+                    maxcount = 40
+                    for (count20 in 1:(ceiling(nrow(summary_df)/20)-1)){
+                        plot(0,type='n',axes=FALSE,ann=FALSE)
+                        if (maxcount <= nrow(summary_df)){
+                            grid.table(summary_df[mincount:maxcount,], rows= NULL)
+                            mincount = mincount+20
+                            maxcount = maxcount+20
+                        }else{### stop last page with last sample otherwise NA in table
+                            grid.table(summary_df[mincount:nrow(summary_df),], rows= NULL)} 
+                    }
+                }
+
+                ## optional output as .RData
+                #if $output_rdata:
+                save(msidata.cv.pls, file="$classification_rdata")
+                #end if
+            ######################## PLS - analysis ###########################
+            #elif str( $type_cond.method_cond.analysis_cond.PLS_method) == "PLS_analysis":
+                print("PLS analysis")
+
+                ## number of components
+                component = c($type_cond.method_cond.analysis_cond.pls_comp)
+
+                ### pls analysis
+                msidata.pls <- PLS(msidata, y = y_vector, ncomp = component, scale=$type_cond.method_cond.analysis_cond.pls_scale)
+
+                ### plot of PLS coefficients
+                plot(msidata.pls, main="PLS coefficients per m/z")
+
+                ### summary table of PLS
+                summary_table = summary(msidata.pls)\$accuracy[[paste0("ncomp = ",component)]]
+                summary_table = cbind(rownames(summary_table), data.frame(summary_table))
+                rownames(summary_table) = NULL
+print(summary_table)
+                ###plot(0,type='n',axes=FALSE,ann=FALSE)
+                ###grid.table(test, rows= TRUE)
+
+                ### image of the best m/z
+                print(image(msidata, mz = topLabels(msidata.pls)[1,1], normalize.image = "linear", contrast.enhance = "histogram",smooth.image="gaussian", main="best m/z heatmap"))
+
+                ## m/z and pixel information output
+                pls_classes = data.frame(msidata.pls\$classes[[1]])
+                rownames(pls_classes) = names(pixels(msidata))
+                colnames(pls_classes) = "predicted diagnosis"
+                pls_toplabels = topLabels(msidata.pls, n=$type_cond.method_cond.analysis_cond.pls_toplabels)
+
+                write.table(pls_toplabels, file="$mzfeatures", quote = FALSE, row.names = TRUE, col.names=NA, sep = "\t")
+                write.table(pls_classes, file="$pixeloutput", quote = FALSE, row.names = TRUE, col.names=NA, sep = "\t")
+
+                ## optional output as .RData
+                #if $output_rdata:
+                save(msidata.pls, file="$classification_rdata")
+                #end if
+
+            #end if
+
+
+        ######################## OPLS #############################
+        #elif str( $type_cond.method_cond.class_method) == "OPLS":
+            print("OPLS")
+
+            ######################## OPLS -CV #############################
+            #if str( $type_cond.method_cond.opls_analysis_cond.opls_method) == "opls_cvapply":
+                print("OPLS cv")
+
+                ## folds
+                #if str($type_cond.method_cond.opls_analysis_cond.opls_fold_cond.opls_fold_vector) == "opls_fold_internal":
+                    fold_vector = msidata\$$type_cond.method_cond.opls_analysis_cond.opls_fold_cond.opls_fold_name
+                #elif str($type_cond.method_cond.opls_analysis_cond.opls_fold_cond.opls_fold_vector) == "opls_fold_external":
+                    fold_tabular = read.delim("$type_cond.method_cond.opls_analysis_cond.opls_fold_cond.opls_fold_data", header = FALSE, stringsAsFactors = FALSE)
+                    fold_vector = as.factor(fold_tabular[,$type_cond.method_cond.opls_analysis_cond.opls_fold_cond.opls_fold_column])
+                    number_pixels = length(fold_vector) ## should be same as in data
+                #end if
+
+                ## plot of folds
+
+                position_df = cbind(coord(msidata)[,1:2], fold_vector)
+                fold_plot = ggplot(position_df, aes(x=x, y=y, fill=fold_vector))+
+                       geom_tile() +
+                       coord_fixed()+
+                       ggtitle("Distribution of the fold variable")+
+                       theme_bw()+
+                       theme(text=element_text(family="ArialMT", face="bold", size=15))+
+                       theme(legend.position="bottom",legend.direction="vertical")+
+                       guides(fill=guide_legend(ncol=4,byrow=TRUE))
+                coord_labels = aggregate(cbind(x,y)~fold_vector, data=position_df, mean, na.rm=TRUE, na.action="na.pass")
+                coord_labels\$file_number = gsub( "_.*$", "", coord_labels\$fold_vector)
+                print(fold_plot)
+
+                ## number of components
+                components = c($type_cond.method_cond.opls_analysis_cond.opls_cvcomp)
+
+                ## OPLS-cvApply:
+                msidata.cv.opls <- cvApply(msidata, .y = y_vector, .fold = fold_vector, .fun = "OPLS", ncomp = components, keep.Xnew = $type_cond.method_cond.opls_analysis_cond.xnew_cv)
+
+                ## create table with summary
+                count = 1
+                summary_oplscv = list()
+                accuracy_vector = numeric()
+                for (iteration in components){
+                    summary_iteration = summary(msidata.cv.opls)\$accuracy[[paste0("ncomp = ", iteration)]]
+                    summary_iteration = cbind(rownames(summary_iteration), summary_iteration) ## include rownames in table
+                    accuracy_vector[count] = summary_iteration[1,2] ## vector with accuracies to find later maximum for plot
+                    empty_row = c(paste0("ncomp = ", iteration), rep( "", length(levels(y_vector)))) ## add line with ncomp for each iteration
+                    ##rownames(labeled_iteration)[1] = paste0("ncomp = ", iteration)
+                    ##labeled_iteration = cbind(rownames(labeled_iteration), labeled_iteration)
+                    labeled_iteration = rbind(empty_row, summary_iteration)
+                    summary_oplscv[[count]] = labeled_iteration ## create list with summary table for each component
+                    count = count+1} 
+                ## create dataframe from list
+                summary_oplscv = do.call(rbind, summary_oplscv) 
+                summary_df = as.data.frame(summary_oplscv)
+                rownames(summary_df) = NULL
+
+                ## plots
+                ## plot to find ncomp with highest accuracy
+                plot(summary(msidata.cv.opls), main="Accuracy of OPLS classification")
+                ncomp_max = components[which.max(accuracy_vector)] ## find ncomp with max. accuracy
+                ## one image for each sample/fold, 4 images per page
+                image(msidata.cv.opls, model = list(ncomp = ncomp_max), layout = c(2, 2))
+
+                par(opar)   
+                ## print table with summary in pdf
+                plot(0,type='n',axes=FALSE,ann=FALSE)
+                title(main="Summary for the different components\n", adj=0.5)
+                ## summary for 4 components (20 rows) fits in one page:
+                if (length(components)<5){
+                    grid.table(summary_df, rows= NULL)
+                }else{
+                    grid.table(summary_df[1:20,], rows= NULL)
+                    mincount = 21
+                    maxcount = 40
+                    for (count20 in 1:(ceiling(nrow(summary_df)/20)-1)){
+                        plot(0,type='n',axes=FALSE,ann=FALSE)
+                        if (maxcount <= nrow(summary_df)){
+                            grid.table(summary_df[mincount:maxcount,], rows= NULL)
+                            mincount = mincount+20
+                            maxcount = maxcount+20
+                        }else{### stop last page with last sample otherwise NA in table
+                            grid.table(summary_df[mincount:nrow(summary_df),], rows= NULL)} 
+                    }
+                }
+
+                ## optional output as .RData
+                #if $output_rdata:
+                save(msidata.cv.opls, file="$classification_rdata")
+                #end if
+
+            ######################## OPLS -analysis ###########################
+            #elif str( $type_cond.method_cond.opls_analysis_cond.opls_method) == "opls_analysis":
+                print("OPLS analysis")
+
+                ## number of components
+                component = c($type_cond.method_cond.opls_analysis_cond.opls_comp)
+
+                ### opls analysis
+                msidata.opls <- PLS(msidata, y = y_vector, ncomp = component, scale=$type_cond.method_cond.opls_analysis_cond.opls_scale, keep.Xnew = $type_cond.method_cond.opls_analysis_cond.xnew)
+
+                ### plot of OPLS coefficients
+                plot(msidata.opls, main="OPLS coefficients per m/z")
+
+                ### summary table of OPLS
+                summary_table = summary(msidata.opls)\$accuracy[[paste0("ncomp = ",component)]]
+                summary_table = cbind(rownames(summary_table), summary_table)
+                rownames(summary_table) = NULL
+                summary_table = data.frame(summary_table)
+                print(summary_table)
+                ###plot(0,type='n',axes=FALSE,ann=FALSE)
+                ###grid.table(test, rows= TRUE)
+
+                ### image of the best m/z
+                print(image(msidata, mz = topLabels(msidata.opls)[1,1], normalize.image = "linear", contrast.enhance = "histogram",smooth.image="gaussian", main="best m/z heatmap"))
+
+                ## m/z and pixel information output
+                opls_classes = data.frame(msidata.opls\$classes[[1]])
+                rownames(opls_classes) = names(pixels(msidata))
+                colnames(opls_classes) = "predicted diagnosis"
+                opls_toplabels = topLabels(msidata.opls, n=$type_cond.method_cond.opls_analysis_cond.opls_toplabels)
+
+                write.table(opls_toplabels, file="$mzfeatures", quote = FALSE, row.names = TRUE, col.names=NA, sep = "\t")
+                write.table(opls_classes, file="$pixeloutput", quote = FALSE, row.names = TRUE, col.names=NA, sep = "\t")
+
+                ## optional output as .RData
+                #if $output_rdata:
+                save(msidata.opls, file="$classification_rdata")
+                #end if
+
+            #end if
+
+        ######################## SSC #############################
+        #elif str( $type_cond.method_cond.class_method) == "spatialShrunkenCentroids":
+            print("SSC")
+
+            ######################## SSC - CV #############################
+            #if str( $type_cond.method_cond.ssc_analysis_cond.ssc_method) == "ssc_cvapply":
+                print("SSC cv")
+
+                ## folds
+                #if str($type_cond.method_cond.ssc_analysis_cond.ssc_fold_cond.ssc_fold_vector) == "ssc_fold_internal":
+                    fold_vector = msidata\$$type_cond.method_cond.ssc_analysis_cond.ssc_fold_cond.ssc_fold_name
+
+                #elif str($type_cond.method_cond.ssc_analysis_cond.ssc_fold_cond.ssc_fold_vector) == "ssc_fold_external":
+                    fold_tabular = read.delim("$type_cond.method_cond.ssc_analysis_cond.ssc_fold_cond.ssc_fold_data", header = FALSE, stringsAsFactors = FALSE)
+                    fold_vector = as.factor(fold_tabular[,$type_cond.method_cond.ssc_analysis_cond.ssc_fold_cond.ssc_fold_column])
+                    number_pixels = length(fold_vector) ## should be same as in data
+                #end if
+
+                ## plot of folds
+                position_df = cbind(coord(msidata)[,1:2], fold_vector)
+                fold_plot = ggplot(position_df, aes(x=x, y=y, fill=fold_vector))+
+                       geom_tile() +
+                       coord_fixed()+
+                       ggtitle("Distribution of the fold variable")+
+                       theme_bw()+
+                       theme(text=element_text(family="ArialMT", face="bold", size=15))+
+                       theme(legend.position="bottom",legend.direction="vertical")+
+                       guides(fill=guide_legend(ncol=4,byrow=TRUE))
+                coord_labels = aggregate(cbind(x,y)~fold_vector, data=position_df, mean, na.rm=TRUE, na.action="na.pass")
+                coord_labels\$file_number = gsub( "_.*$", "", coord_labels\$fold_vector)
+                print(fold_plot)
+
+                ## SSC-cvApply:
+                msidata.cv.ssc <- cvApply(msidata, .y = y_vector,.fold = fold_vector,.fun = "spatialShrunkenCentroids", r = c($type_cond.method_cond.ssc_r), s = c($type_cond.method_cond.ssc_s), method = "$type_cond.method_cond.ssc_kernel_method")
+
+                ## create table with summary
+                count = 1
+                summary_ssccv = list()
+                accuracy_vector = numeric()
+
+                for (iteration in names(msidata.cv.ssc@resultData[[1]][,1])){
+                    summary_iteration = summary(msidata.cv.ssc)\$accuracy[[iteration]]
+                    summary_iteration = cbind(rownames(summary_iteration), summary_iteration) ## include rownames in table
+                    accuracy_vector[count] = summary_iteration[1,2] ## vector with accuracies to find later maximum for plot
+                    empty_row = c(iteration, rep( "", length(levels(y_vector)))) ## add line with ncomp for each iteration
+                    labeled_iteration = rbind(empty_row, summary_iteration)
+                    summary_ssccv[[count]] = labeled_iteration ## create list with summary table for each component
+                    count = count+1
+                }
+
+                ##create dataframe from list
+                summary_ssccv = do.call(rbind, summary_ssccv) 
+                summary_df = as.data.frame(summary_ssccv)
+                rownames(summary_df) = NULL
+
+                ## plot to find parameters with highest accuracy
+                plot(summary(msidata.cv.ssc), main="Accuracy of SSC classification")
+                best_params = names(msidata.cv.ssc@resultData[[1]][,1])[which.max(accuracy_vector)] ## find parameters with max. accuracy
+                r_value = as.numeric(substring(unlist(strsplit(best_params, ","))[1], 4))
+                s_value = as.numeric(substring(unlist(strsplit(best_params, ","))[3], 5)) ## remove space
+
+                image(msidata.cv.ssc, model = list( r = r_value, s = s_value ), layout=c(2,2))
+
+                par(opar)
+                ## print table with summary in pdf
+                plot(0,type='n',axes=FALSE,ann=FALSE)
+                title(main="Summary for the different parameters\n", adj=0.5)
+                ## summary for 4 parameters (20 rows) fits in one page:
+                if (length(names(msidata.cv.ssc@resultData[[1]][,1]))<5){
+                    grid.table(summary_df, rows= NULL)
+                }else{
+                    grid.table(summary_df[1:20,], rows= NULL)
+                    mincount = 21
+                    maxcount = 40
+                    for (count20 in 1:(ceiling(nrow(summary_df)/20)-1)){
+                        plot(0,type='n',axes=FALSE,ann=FALSE)
+                        if (maxcount <= nrow(summary_df)){
+                            grid.table(summary_df[mincount:maxcount,], rows= NULL)
+                            mincount = mincount+20
+                            maxcount = maxcount+20
+                        }else{### stop last page with last sample otherwise NA in table
+                            grid.table(summary_df[mincount:nrow(summary_df),], rows= NULL)} 
+                    }
+                }
+
+                ## optional output as .RData
+                #if $output_rdata:
+                save(msidata.cv.opls, file="$classification_rdata")
+                #end if
+
+            ######################## SSC -analysis ###########################
+            #elif str( $type_cond.method_cond.ssc_analysis_cond.ssc_method) == "ssc_analysis":
+                print("SSC analysis")
+
+                ## SSC analysis
+                msidata.ssc <- spatialShrunkenCentroids(msidata, y = y_vector, .fold = fold_vector, 
+r = c($type_cond.method_cond.ssc_r), s = c($type_cond.method_cond.ssc_s), method = "$type_cond.method_cond.ssc_kernel_method")
+
+                plot(msidata.ssc, mode = "tstatistics", model = list("r" = c($type_cond.method_cond.ssc_r), "s" = c($type_cond.method_cond.ssc_s)))
+
+                ### summary table SSC
+
+                ##summary(msidata.ssc)\$accuracy[[names(msidata.ssc@resultData)]]
+                summary_table = summary(msidata.ssc)
+print(summary_table)
+                ##summary_table = cbind(rownames(summary_table), summary_table)
+                ##rownames(summary_table) = NULL
+
+                ###plot(0,type='n',axes=FALSE,ann=FALSE)
+                ###grid.table(summary_table, rows= TRUE)
+
+                ### image of the best m/z
+                print(image(msidata, mz = topLabels(msidata.ssc)[1,1], normalize.image = "linear", contrast.enhance = "histogram",smooth.image="gaussian", main="best m/z heatmap"))
+
+                ## m/z and pixel information output
+                ssc_classes = data.frame(msidata.ssc\$classes[[1]])
+                rownames(ssc_classes) = names(pixels(msidata))
+                colnames(ssc_classes) = "predicted diagnosis"
+                ssc_toplabels = topLabels(msidata.ssc)
+
+                write.table(ssc_toplabels, file="$mzfeatures", quote = FALSE, row.names = TRUE, col.names=NA, sep = "\t")
+                write.table(ssc_classes, file="$pixeloutput", quote = FALSE, row.names = TRUE, col.names=NA, sep = "\t")
+
+                ## optional output as .RData
+                #if $output_rdata:
+                save(msidata.ssc, file="$classification_rdata")
+                #end if
+
+            #end if
+        #end if
+
+
+    ######################## II) Prediction #############################
+    #############################################################################
+    #elif str( $type_cond.type_method) == "prediction":
+        print("prediction")
+
+        #if str($type_cond.new_y.new_y_values) == "no_new_y": 
+            new_y_vector = FALSE
+        #elif str($type_cond.new_y.new_y_values) == "new_y_internal":
+            new_y_vector = msidata\$$type_cond.new_y.new_y_name
+        #elif str($type_cond.new_y.new_y_values) == "new_y_external":
+            
+            new_y_tabular = read.delim("$type_cond.new_y.new_y_data", header = FALSE, stringsAsFactors = FALSE)
+            new_y_vector = new_y_tabular[,$type_cond.new_y.new_y_column]
+            number_pixels = length(new_y_vector) ## should be same as in data
+        #end if
+
+        training_data = loadRData("$type_cond.training_result")
+        prediction = predict(training_data,msidata, newy = new_y_vector)
+
+        ## optional output as .RData
+        #if $output_rdata:
+        msidata = prediction
+        save(msidata, file="$classification_rdata")
+        #end if
+    #end if
+
+    dev.off()
+}else{
+    print("Inputfile has no intensities > 0")
+    dev.off()
+}
+
+    ]]></configfile>
+    </configfiles>
+    <inputs>
+        <param name="infile" type="data" format="imzml, rdata, analyze75"
+               label="Inputfile as imzML, Analyze7.5 or Cardinal MSImageSet saved as RData"
+                help="Upload composite datatype imzml (ibd+imzML) or analyze75 (hdr+img+t2m) or regular upload .RData (Cardinal MSImageSet)"/>
+        <conditional name="processed_cond">
+            <param name="processed_file" type="select" label="Is the input file a processed imzML file ">
+                <option value="no_processed" selected="True">not a processed imzML</option>
+                <option value="processed">processed imzML</option>
+            </param>
+            <when value="no_processed"/>
+            <when value="processed">
+                <param name="accuracy" type="float" value="50" label="Mass accuracy to which the m/z values will be binned" help="This should be set to the native accuracy of the mass spectrometer, if known"/>
+                <param name="units" display="radio" type="select" label="Unit of the mass accuracy" help="either m/z or ppm">
+                    <option value="mz" >mz</option>
+                    <option value="ppm" selected="True" >ppm</option>
+                </param>
+            </when>
+        </conditional>
+
+        <conditional name="type_cond">
+            <param name="type_method" type="select" label="Analysis step to perform">
+                <option value="training" selected="True">training</option>
+                <option value="prediction">prediction</option>
+            </param>
+            <when value="training">
+
+                <conditional name="method_cond">
+                    <param name="class_method" type="select" label="Select the method for classification">
+                        <option value="PLS" selected="True">PLS</option>
+                        <option value="OPLS">OPLS</option>
+                        <option value="spatialShrunkenCentroids">spatial shrunken centroids</option>
+                    </param>
+                    <when value="PLS">
+
+                        <conditional name="analysis_cond">
+                            <param name="PLS_method" type="select" label="Crossvalidation or analysis">
+                                <option value="cvapply" selected="True">cvApply</option>
+                                <option value="PLS_analysis">PLS analysis</option>
+                            </param>
+                            <when value="cvapply">
+
+                                <param name="plscv_comp" type="text" value="1:2"
+                                       label="The number of PLS components" help="Multiple values are allowed (e.g. 1,2,3 or 2:5)"/>
+                                <conditional name="fold_cond">
+                                    <param name="fold_vector" type="select" label="Define the fold variable">
+                                        <option value="fold_internal" selected="True">dataset contains already fold</option>
+                                        <option value="fold_external">use fold from tabular file</option>
+                                    </param>
+                                    <when value="fold_internal">
+                                        <param name="fold_name" type="text" value="sample" label="Name of the pData slot where fold is stored" help="each fold must contain pixels of all categories"/>
+                                    </when>
+                                    <when value="fold_external">
+                                        <param name="fold_data" type="data" format="tabular" label="Tabular file with column for folds" help="Number of rows must be number of pixels"/>
+                                        <param name="fold_column" data_ref="fold_data" label="Column with folds" type="data_column"/>
+                                    </when>
+                                </conditional>
+                            </when>
+
+                            <when value="PLS_analysis">
+                                <param name="pls_comp" type="integer" value="5"
+                                       label="The optimal number of PLS components as indicated by cross-validations" help="Run cvApply first to optain optiaml number of PLS components"/>
+                                <param name="pls_scale" type="boolean" display="radio" label="data scaling" truevalue="TRUE" falsevalue="FALSE"/>
+                                <param name="pls_toplabels" type="integer" value="100"
+                                   label="Number of toplabels (masses) which should be written in tabular output"/>
+                            </when>
+                        </conditional>
+                    </when>
+
+                    <when value="OPLS">
+
+                        <conditional name="opls_analysis_cond">
+                            <param name="opls_method" type="select" label="Analysis step to perform">
+                                <option value="opls_cvapply" selected="True">cvApply</option>
+                                <option value="opls_analysis">OPLS analysis</option>
+                            </param>
+
+                            <when value="opls_cvapply">
+                                <param name="opls_cvcomp" type="text" value="1:2"
+                                       label="The number of OPLS components" help="Multiple values are allowed (e.g. 1,2,3 or 2:5)"/>
+                                <param name="xnew_cv" type="boolean" display="radio" truevalue="TRUE" falsevalue="FALSE" label="Keep new matrix"/>
+                                <conditional name="opls_fold_cond">
+                                    <param name="opls_fold_vector" type="select" label="Define the fold variable">
+                                        <option value="opls_fold_internal" selected="True">dataset contains already fold</option>
+                                        <option value="opls_fold_external">use fold from tabular file</option>
+                                    </param>
+                                    <when value="opls_fold_internal">
+                                        <param name="opls_fold_name" type="text" value="sample" label="Name of the pData slot where fold is stored" help="each fold must contain pixels of all categories"/>
+                                    </when>
+                                    <when value="opls_fold_external">
+                                        <param name="opls_fold_data" type="data" format="tabular" label="Tabular file with column for folds" help="Number of rows must be number of pixels"/>
+                                        <param name="opls_fold_column" data_ref="opls_fold_data" label="Column with folds" type="data_column"/>
+                                    </when>
+                                </conditional>
+                            </when>
+
+                            <when value="opls_analysis">
+                                <param name="opls_comp" type="integer" value="5"
+                                       label="The optimal number of PLS components as indicated by cross-validations" help="Run cvApply first to optain optiaml number of PLS components"/>
+                                <param name="xnew" type="boolean" display="radio" truevalue="TRUE" falsevalue="FALSE" label="Keep new matrix"/>                                
+                                <param name="opls_scale" type="select" label="data scaling" display="radio" optional="False">
+                                    <option value="TRUE">yes</option>
+                                    <option value="FALSE" selected="True">no</option>
+                               </param>
+                                <param name="opls_toplabels" type="integer" value="100"
+                                   label="Number of toplabels (features) which should be written in tabular output"/>
+                            </when>
+                        </conditional>
+                    </when>
+
+                    <when value="spatialShrunkenCentroids">
+                        <conditional name="ssc_analysis_cond">
+                            <param name="ssc_method" type="select" label="Analysis step to perform">
+                                <option value="ssc_cvapply" selected="True">cvApply</option>
+                                <option value="ssc_analysis">spatial shrunken centroids analysis</option>
+                            </param>
+                            <when value="ssc_cvapply">
+
+                                <conditional name="ssc_fold_cond">
+                                    <param name="ssc_fold_vector" type="select" label="Define the fold variable">
+                                        <option value="ssc_fold_internal" selected="True">dataset contains already fold</option>
+                                        <option value="ssc_fold_external">use fold from tabular file</option>
+                                    </param>
+                                    <when value="ssc_fold_internal">
+                                        <param name="ssc_fold_name" type="text" value="sample" label="Name of the pData slot where fold is stored" help="each fold must contain pixels of all categories"/>
+                                    </when>
+                                    <when value="ssc_fold_external">
+                                        <param name="ssc_fold_data" type="data" format="tabular" label="Tabular file with column for folds" help="Number of rows must be number of pixels"/>
+                                        <param name="ssc_fold_column" data_ref="ssc_fold_data" label="Column with folds" type="data_column"/>
+                                    </when>
+                                </conditional>
+                            </when>
+
+                            <when value="ssc_analysis">
+
+                                <param name="ssc_toplabels" type="integer" value="100"
+                                   label="Number of toplabels (features) which should be written in tabular output"/>
+                            </when>
+                        </conditional>
+                        <param name="ssc_r" type="text" value="2"
+                               label="The spatial neighborhood radius of nearby pixels to consider (r)" help="For cvapply multiple values are allowed (e.g. 1,2,3 or 2:5)"/>
+                        <param name="ssc_s" type="text" value="2"
+                               label="The sparsity thresholding parameter by which to shrink the t-statistics (s)" help="For cvapply multiple values are allowed (e.g. 1,2,3 or 2:5)"/>
+                        <param name="ssc_kernel_method" type="select" display="radio" label = "The method to use to calculate the spatial smoothing kernels for the embedding. The 'gaussian' method refers to spatially-aware (SA) weights, and 'adaptive' refers to spatially-aware structurally-adaptive (SASA) weights">
+                            <option value="gaussian">gaussian</option>
+                            <option value="adaptive" selected="True">adaptive</option>
+                        </param>
+
+                    </when>
+                </conditional>
+                <conditional name="y_cond">
+                    <param name="y_vector" type="select" label="Define the response variable y">
+                        <option value="y_internal" selected="True">dataset contains already y</option>
+                        <option value="y_external">use y from tabular file</option>
+                    </param>
+                    <when value="y_internal">
+                        <param name="y_name" type="text" value="combined_sample" label="Name of the pData slot where y is stored" help="Outputs of MSI_combine tool have 'combined_sample' as name"/>
+                    </when>
+                    <when value="y_external">
+                        <param name="y_data" type="data" format="tabular" label="Tabular file with column for y response"/>
+                        <param name="y_column" data_ref="y_data" label="Column with y response" type="data_column"/>
+                    </when>
+                </conditional>
+            </when>
+
+            <when value="prediction">
+                <param name="training_result" type="data" format="rdata" label="Result from previous classification training"/>
+                <conditional name="new_y">
+                    <param name="new_y_values" type="select" label="Define the new response y">
+                        <option value="no_new_y" >no new y response</option>
+                        <option value="new_y_internal" selected="True">dataset contains already y</option>
+                        <option value="new_y_external">use y from tabular file</option>
+                    </param>
+                    <when value="no_new_y"/>
+                    <when value="new_y_internal">
+                        <param name="new_y_name" type="text" value="combined_sample" label="Name of the pData slot where y is stored" help="data merged with MSI_combine tool has 'combined_sample' as name"/>
+                    </when>
+
+                    <when value="new_y_external">
+                        <param name="new_y_data" type="data" format="tabular" label="Tabular file with column for y response"/>
+                        <param name="new_y_column" data_ref="new_y_data" label="Column with y response" type="data_column"/>
+                    </when>
+                </conditional>
+            </when>
+        </conditional>
+        <param name="output_rdata" type="boolean" display="radio" label="Results as .RData output"/>
+    </inputs>
+    <outputs>
+        <data format="pdf" name="classification_images" from_work_dir="classificationpdf.pdf" label = "$infile.display_name classification"/>
+        <data format="tabular" name="mzfeatures" label="$infile.display_name features"/>
+        <data format="tabular" name="pixeloutput" label="$infile.display_name pixels"/>
+        <data format="rdata" name="classification_rdata" label="$infile.display_name classification">
+            <filter>output_rdata</filter>
+        </data>
+    </outputs>
+    <tests>
+        <test expect_num_outputs="3">
+            <param name="infile" value="testfile_squares.rdata" ftype="rdata"/>
+            <conditional name="type_cond">
+                <param name="type_method" value="training"/>
+                <conditional name="method_cond">
+                    <param name="class_method" value="PLS"/>
+                    <conditional name="analysis_cond">
+                        <param name="PLS_method" value="cvapply"/>
+
+                        <param name="plscv_comp" value="2:4"/>
+                        <conditional name="fold_cond">
+                            <param name="fold_vector" value="fold_external"/>
+                            <param name="fold_data" value="pixel_annotation_file1.tabular" ftype="tabular"/>
+                            <param name="fold_column" value="1"/>
+                        </conditional>
+
+                    </conditional>
+                </conditional>
+                <conditional name="y_cond">
+                    <param name="y_vector" value="y_external"/>
+                    <param name="y_data" value="pixel_annotation_file1.tabular" ftype="tabular"/>
+                    <param name="y_column" value="2"/>
+                </conditional>
+            </conditional>
+            <output name="mzfeatures" file="features_test1.tabular"/>
+            <output name="pixeloutput" file="pixels_test1.tabular"/>
+            <output name="classification_images" file="test1.pdf" compare="sim_size" delta="20000"/>
+        </test>
+
+        <test expect_num_outputs="4">
+            <param name="infile" value="testfile_squares.rdata" ftype="rdata"/>
+            <conditional name="type_cond">
+                <param name="type_method" value="training"/>
+                <conditional name="method_cond">
+                    <param name="class_method" value="PLS"/>
+                    <conditional name="analysis_cond">
+                        <param name="PLS_method" value="PLS_analysis"/>
+
+                        <param name="pls_comp" value="2"/>
+                        <param name="pls_scale" value="TRUE"/>
+                        <param name="pls_toplabels" value="100"/>
+                        <conditional name="fold_cond">
+                            <param name="fold_vector" value="fold_external"/>
+                            <param name="fold_data" value="pixel_annotation_file1.tabular" ftype="tabular"/>
+                            <param name="fold_column" value="1"/>
+                        </conditional>
+
+                    </conditional>
+                </conditional>
+                <conditional name="y_cond">
+                    <param name="y_vector" value="y_external"/>
+                    <param name="y_data" value="pixel_annotation_file1.tabular" ftype="tabular"/>
+                    <param name="y_column" value="2"/>
+                </conditional>
+            </conditional>
+            <param name="output_rdata" value="True"/>
+            <output name="mzfeatures" file="features_test2.tabular"/>
+            <output name="pixeloutput" file="pixels_test2.tabular"/>
+            <output name="classification_images" file="test2.pdf" compare="sim_size" delta="20000"/>
+            <output name="classification_rdata" file="test2.rdata" compare="sim_size" />
+        </test>
+
+        <test expect_num_outputs="3">
+            <param name="infile" value="testfile_squares.rdata" ftype="rdata"/>
+            <conditional name="type_cond">
+                <param name="type_method" value="training"/>
+                <conditional name="method_cond">
+                    <param name="class_method" value="OPLS"/>
+                    <conditional name="opls_analysis_cond">
+                        <param name="opls_method" value="opls_analysis"/>
+
+                        <param name="opls_cvcomp" value="1:2"/>
+                        <param name="xnew_cv" value="FALSE"/>
+                        <conditional name="opls_fold_cond">
+                            <param name="opls_fold_vector" value="opls_fold_external"/>
+                            <param name="opls_fold_data" ftype="tabular" value="random_factors.tabular"/>
+                            <param name="opls_fold_column" value="1"/>
+                        </conditional>
+                    </conditional>
+                </conditional>
+                <conditional name="y_cond">
+                    <param name="y_vector" value="y_external"/>
+                    <param name="y_data" value="random_factors.tabular" ftype="tabular"/>
+                    <param name="y_column" value="2"/>
+                </conditional>
+            </conditional>
+            <output name="mzfeatures" file="features_test3.tabular"/>
+            <output name="pixeloutput" file="pixels_test3.tabular"/>
+            <output name="classification_images" file="test3.pdf" compare="sim_size" delta="20000"/>
+        </test>
+
+        <test expect_num_outputs="4">
+            <param name="infile" value="testfile_squares.rdata" ftype="rdata"/>
+            <conditional name="type_cond">
+                <param name="type_method" value="training"/>
+                <conditional name="method_cond">
+                    <param name="class_method" value="OPLS"/>
+                    <conditional name="opls_analysis_cond">
+
+                        <param name="opls_method" value="opls_analysis"/>
+                        <param name="opls_comp" value="3"/>
+                        <param name="xnew" value="FALSE"/>
+                        <param name="opls_scale" value="FALSE"/>
+                        <param name="opls_toplabels" value="100"/>
+                    </conditional>
+
+                </conditional>
+                <conditional name="y_cond">
+                    <param name="y_vector" value="y_external"/>
+                    <param name="y_data" value="random_factors.tabular" ftype="tabular"/>
+                    <param name="y_column" value="2"/>
+                </conditional>
+            </conditional>
+            <param name="output_rdata" value="True"/>
+            <output name="mzfeatures" file="features_test4.tabular"/>
+            <output name="pixeloutput" file="pixels_test4.tabular"/>
+            <output name="classification_images" file="test4.pdf" compare="sim_size" delta="20000"/>
+            <output name="classification_rdata" file="test4.rdata" compare="sim_size" />
+        </test>
+
+        <test expect_num_outputs="3">
+            <param name="infile" value="testfile_squares.rdata" ftype="rdata"/>
+            <conditional name="type_cond">
+                <param name="type_method" value="training"/>
+                <conditional name="method_cond">
+                    <param name="class_method" value="spatialShrunkenCentroids"/>
+                    <conditional name="ssc_analysis_cond">
+                        <param name="ssc_method" value="ssc_cvapply"/>
+                        <conditional name="ssc_fold_cond">
+                            <param name="ssc_fold_vector" value="ssc_fold_external"/>
+                            <param name="ssc_fold_data" value="pixel_annotation_file1.tabular" ftype="tabular"/>
+                            <param name="ssc_fold_column" value="1"/>
+                        </conditional>
+                        <param name="ssc_r" value="1:2"/>
+                        <param name="ssc_s" value="2:3"/>
+                        <param name="ssc_kernel_method" value="adaptive"/>
+                    </conditional>
+                </conditional>
+                <conditional name="y_cond">
+                    <param name="y_vector" value="y_external"/>
+                    <param name="y_data" value="pixel_annotation_file1.tabular" ftype="tabular"/>
+                    <param name="y_column" value="2"/>
+                </conditional>
+            </conditional>
+            <output name="mzfeatures" file="features_test5.tabular"/>
+            <output name="pixeloutput" file="pixels_test5.tabular"/>
+            <output name="classification_images" file="test5.pdf" compare="sim_size" delta="20000"/>
+        </test>
+
+        <test expect_num_outputs="4">
+            <param name="infile" value="testfile_squares.rdata" ftype="rdata"/>
+            <conditional name="type_cond">
+                <param name="type_method" value="training"/>
+                <conditional name="method_cond">
+                    <param name="class_method" value="spatialShrunkenCentroids"/>
+                    <conditional name="ssc_analysis_cond">
+                        <param name="ssc_method" value="ssc_analysis"/>
+                        <param name="ssc_toplabels" value="100"/>
+                     </conditional>
+                    <param name="ssc_r" value="2"/>
+                    <param name="ssc_s" value="2"/>
+                    <param name="ssc_kernel_method" value="adaptive"/>
+                </conditional>
+                <conditional name="y_cond">
+                    <param name="y_vector" value="y_external"/>
+                    <param name="y_data" value="random_factors.tabular" ftype="tabular"/>
+                    <param name="y_column" value="2"/>
+                </conditional>
+            </conditional>
+            <param name="output_rdata" value="True"/>
+            <output name="mzfeatures" file="features_test6.tabular"/>
+            <output name="pixeloutput" file="pixels_test6.tabular"/>
+            <output name="classification_images" file="test6.pdf" compare="sim_size" delta="20000"/>
+            <output name="classification_rdata" file="test6.rdata" compare="sim_size" />
+        </test>
+
+        <test expect_num_outputs="4">
+            <param name="infile" value="testfile_squares.rdata" ftype="rdata"/>
+            <conditional name="type_cond">
+                <param name="type_method" value="prediction"/>
+                <param name="training_result" value="test2.rdata" ftype="rdata"/>
+                <conditional name="new_y">
+                    <param name="new_y_values" value="new_y_external"/>
+                    <param name="new_y_data" value="pixel_annotation_file1.tabular" ftype="tabular"/>
+                        <param name="new_y_column" value="2"/>
+                </conditional>
+            </conditional>
+            <param name="output_rdata" value="True"/>
+            <output name="mzfeatures" file="features_test7.tabular"/>
+            <output name="pixeloutput" file="pixels_test7.tabular"/>
+            <output name="classification_images" file="test7.pdf" compare="sim_size" delta="20000"/>
+            <output name="classification_rdata" file="test7.rdata" compare="sim_size" />
+        </test>
+
+    </tests>
+    <help>
+        <![CDATA[
+
+Cardinal is an R package that implements statistical & computational tools for analyzing mass spectrometry imaging datasets. `More information on Cardinal <http://cardinalmsi.org//>`_
+
+This tool provides three different Cardinal functions for supervised classification of mass-spectrometry imaging data.
+
+Input data: 3 types of input data can be used:
+
+- imzml file (upload imzml and ibd file via the "composite" function) `Introduction to the imzml format <https://ms-imaging.org/wp/imzml/>`_
+- Analyze7.5 (upload hdr, img and t2m file via the "composite" function)
+- Cardinal "MSImageSet" data (with variable name "msidata", saved as .RData)
+
+Options: 
+
+- PLS(-DA): partial least square (discriminant analysis)
+- O-PLS(-DA): Orthogonal partial least squares (discriminant analysis)
+- Spatial shrunken centroids
+
+Output: 
+
+- Pdf with the heatmaps and plots for the classification
+- Tabular file with information on masses and pixels: toplabels/classes (PLS, spatial shrunken centroids)
+- optional RData output to further explore the results with Cardinal in R
+
+        ]]>
+    </help>
+    <citations>
+        <citation type="doi">10.1093/bioinformatics/btv146</citation>
+    </citations>
+</tool>