comparison classification.xml @ 19:4c177985028a draft default tip

planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cardinal commit 91e77c139cb3b7c6d67727dc39140dd79355fa0c
author galaxyp
date Thu, 04 Jul 2024 13:45:03 +0000
parents eddc2ae2db80
children
comparison
equal deleted inserted replaced
18:0a18ac48ac53 19:4c177985028a
1 <tool id="cardinal_classification" name="MSI classification" version="@VERSION@.0"> 1 <tool id="cardinal_classification" name="MSI classification" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="22.05">
2 <description>spatial classification of mass spectrometry imaging data</description> 2 <description>spatial classification of mass spectrometry imaging data</description>
3 <macros> 3 <macros>
4 <import>macros.xml</import> 4 <import>macros.xml</import>
5 </macros> 5 </macros>
6 <expand macro="requirements"> 6 <expand macro="requirements"/>
7 <requirement type="package" version="2.3">r-gridextra</requirement>
8 <requirement type="package" version="3.3.5">r-ggplot2</requirement>
9 </expand>
10 <command detect_errors="exit_code"> 7 <command detect_errors="exit_code">
11 <![CDATA[ 8 <![CDATA[
12 9
13 @INPUT_LINKING@ 10 @INPUT_LINKING@
14 cat '${MSI_segmentation}' && 11 cat '${MSI_segmentation}' &&
15 Rscript '${MSI_segmentation}' 12 Rscript '${MSI_segmentation}'
16 13
25 library(Cardinal) 22 library(Cardinal)
26 library(gridExtra) 23 library(gridExtra)
27 library(ggplot2) 24 library(ggplot2)
28 library(scales) 25 library(scales)
29 26
27
28
30 @READING_MSIDATA@ 29 @READING_MSIDATA@
31 30
32 msidata = as(msidata, "MSImageSet") ##coercion to MSImageSet 31
33 32 msidata = as(msidata, "MSImagingExperiment")
34 33
35 ## remove duplicated coordinates 34 ## remove duplicated coordinates
36 msidata <- msidata[,!duplicated(coord(msidata))] 35 msidata <- msidata[,!duplicated(coord(msidata))]
37 36
38 @DATA_PROPERTIES_INRAM@ 37 @DATA_PROPERTIES_INRAM@
60 ################################################################################ 59 ################################################################################
61 60
62 ## table with values 61 ## table with values
63 grid.table(property_df, rows= NULL) 62 grid.table(property_df, rows= NULL)
64 63
65 64 int_matrix = as.matrix(spectra(msidata))
66 if (npeaks > 0 && sum(is.na(spectra(msidata)))==0){ 65 NAcount = sum(is.na(int_matrix))
66
67
68 if (npeaks > 0 && NAcount==0){
67 69
68 opar <- par() 70 opar <- par()
69 71
70 ######################## II) Training ####################################### 72 ######################## II) Training #######################################
71 ############################################################################# 73 #############################################################################
73 print("training") 75 print("training")
74 76
75 77
76 ## load y response (will be needed in every training scenario) 78 ## load y response (will be needed in every training scenario)
77 79
78 y_tabular = read.delim("$type_cond.annotation_file", header = $type_cond.tabular_header, stringsAsFactors = FALSE) 80 y_tabular = read.delim("$type_cond.annotation_file", header = $type_cond.tabular_header, stringsAsFactors = FALSE)
79 81
80 #if str($type_cond.column_fold) == "None": 82 #if str($type_cond.column_fold) == "None":
81 y_input = y_tabular[,c($type_cond.column_x, $type_cond.column_y, $type_cond.column_response)] 83 y_input = y_tabular[,c($type_cond.column_x, $type_cond.column_y, $type_cond.column_response)]
82 #else 84 #else
83 y_input = y_tabular[,c($type_cond.column_x, $type_cond.column_y, $type_cond.column_response, $type_cond.column_fold)] 85 y_input = y_tabular[,c($type_cond.column_x, $type_cond.column_y, $type_cond.column_response, $type_cond.column_fold)]
84 #end if 86 #end if
85 87 colnames(y_input)[1:2] = c("x", "y")
86 colnames(y_input)[1:2] = c("x", "y") 88
87 ## merge with coordinate information of msidata 89 ## merge with coordinate information of msidata
88 msidata_coordinates = cbind(coord(msidata)[,1:2], c(1:ncol(msidata))) 90 msidata_coordinates = cbind(coord(msidata)[,1:2], c(1:ncol(msidata)))
89 colnames(msidata_coordinates)[3] = "pixel_index" 91 colnames(msidata_coordinates)[3] = "pixel_index"
90 merged_response = merge(msidata_coordinates, y_input, by=c("x", "y"), all.x=TRUE) 92 merged_response = as.data.frame(merge(msidata_coordinates, y_input, by=c("x", "y"), all.x=TRUE))
91 merged_response[is.na(merged_response)] = "NA" 93 merged_response[is.na(merged_response)] = "NA"
92 merged_response = merged_response[order(merged_response\$pixel_index),] 94 merged_response = merged_response[order(merged_response\$pixel_index),]
93 conditions = as.factor(merged_response[,4]) 95 conditions = as.factor(merged_response[,4])
94 y_vector = conditions 96 y_vector = conditions
95 97
96 ## colours selection: 98 ## colours selection:
97 99
98 #if str($colour_conditional.colour_type) == "manual_colour" 100 #if str($colour_conditional.colour_type) == "manual_colour"
99 #set $color_string = ','.join(['"%s"' % $color.annotation_color for $color in $colour_conditional.colours]) 101 #set $color_string = ','.join(['"%s"' % $color.annotation_color for $color in $colour_conditional.colours])
100 colourvector = c($color_string) 102 colourvector = c($color_string)
101 103
102 #elif str($colour_conditional.colour_type) == "colourpalette" 104 #elif str($colour_conditional.colour_type) == "colourpalette"
103 number_levels = (length(levels(conditions))) 105 number_levels = (length(levels(conditions)))
104 colourvector = noquote($colour_conditional.palettes)(number_levels) 106 colourvector = noquote($colour_conditional.palettes)(number_levels)
105 107
106 #end if 108 #end if
107 109
108 ## plot of y vector 110
109 111 ## plot of y vector
110 position_df = cbind(coord(msidata)[,1:2], conditions) 112
111 y_plot = ggplot(position_df, aes(x=x, y=y, fill=conditions))+ 113 position_df = as.data.frame(cbind(coord(msidata)[,1:2], conditions))
114 y_plot = ggplot(position_df, aes(x=x, y=y, fill=conditions))+
112 geom_tile() + 115 geom_tile() +
113 coord_fixed()+ 116 coord_fixed()+
114 ggtitle("Distribution of the conditions")+ 117 ggtitle("Distribution of the conditions")+
115 theme_bw()+ 118 theme_bw()+
116 theme( 119 theme(
117 plot.background = element_blank(), 120 plot.background = element_blank(),
118 panel.grid.major = element_blank(), 121 panel.grid.major = element_blank(),
119 panel.grid.minor = element_blank())+ 122 panel.grid.minor = element_blank())+
120 theme(text=element_text(family="ArialMT", face="bold", size=15))+ 123 theme(text=element_text(family="ArialMT", face="bold", size=15))+
121 theme(legend.position="bottom",legend.direction="vertical")+ 124 theme(legend.position="bottom",legend.direction="vertical")+
122 guides(fill=guide_legend(ncol=4,byrow=TRUE))+ 125 guides(fill=guide_legend(ncol=4,byrow=TRUE))+
123 scale_discrete_manual(aesthetics = c("colour", "fill"), values = colourvector) 126 scale_discrete_manual(aesthetics = c("colour", "fill"), values = colourvector)
124 coord_labels = aggregate(cbind(x,y)~conditions, data=position_df, mean, na.rm=TRUE, na.action="na.pass") 127 coord_labels = aggregate(cbind(x,y)~conditions, data=position_df, mean, na.rm=TRUE, na.action="na.pass")
125 coord_labels\$file_number = gsub( "_.*$", "", coord_labels\$conditions) 128 ##coord_labels\$file_number = gsub( "_.*$", "", coord_labels\$conditions)
126 print(y_plot) 129 print(y_plot)
127 130
128 131 ## plot of folds
129 ## plot of folds 132
130 133 #if str($type_cond.column_fold) != "None":
131 #if str($type_cond.column_fold) != "None": 134 fold_vector = as.factor(merged_response[,5])
132 fold_vector = as.factor(merged_response[,5]) 135
133 136 position_df = as.data.frame(cbind(coord(msidata)[,1:2], fold_vector))
134 137 fold_plot = ggplot(position_df, aes(x=x, y=y, fill=fold_vector))+
135 position_df = cbind(coord(msidata)[,1:2], fold_vector)
136 fold_plot = ggplot(position_df, aes(x=x, y=y, fill=fold_vector))+
137 geom_tile() + 138 geom_tile() +
138 coord_fixed()+ 139 coord_fixed()+
139 ggtitle("Distribution of the fold variable")+ 140 ggtitle("Distribution of the fold variable")+
140 theme_bw()+ 141 theme_bw()+
141 theme( 142 theme(
142 plot.background = element_blank(), 143 plot.background = element_blank(),
143 panel.grid.major = element_blank(), 144 panel.grid.major = element_blank(),
144 panel.grid.minor = element_blank())+ 145 panel.grid.minor = element_blank())+
145 theme(text=element_text(family="ArialMT", face="bold", size=15))+ 146 theme(text=element_text(family="ArialMT", face="bold", size=15))+
146 theme(legend.position="bottom",legend.direction="vertical")+ 147 theme(legend.position="bottom",legend.direction="vertical")+
147 guides(fill=guide_legend(ncol=4,byrow=TRUE)) 148 guides(fill=guide_legend(ncol=4,byrow=TRUE))
148 coord_labels = aggregate(cbind(x,y)~fold_vector, data=position_df, mean, na.rm=TRUE, na.action="na.pass") 149 coord_labels = aggregate(cbind(x,y)~fold_vector, data=position_df, mean, na.rm=TRUE, na.action="na.pass")
149 coord_labels\$file_number = gsub( "_.*$", "", coord_labels\$fold_vector) 150 ##coord_labels\$file_number = gsub( "_.*$", "", coord_labels\$fold_vector)
150 print(fold_plot) 151 print(fold_plot)
151 152
152 #end if 153 #end if
153 154
154 ######################## PLS ############################# 155 ######################## PLS #############################
155 #if str( $type_cond.method_cond.class_method) == "PLS": 156 #if str( $type_cond.method_cond.class_method) == "PLS":
156 print("PLS") 157 print("PLS")
157 158
162 ## set variables for components and number of response groups 163 ## set variables for components and number of response groups
163 components = c($type_cond.method_cond.analysis_cond.plscv_comp) 164 components = c($type_cond.method_cond.analysis_cond.plscv_comp)
164 number_groups = length(levels(y_vector)) 165 number_groups = length(levels(y_vector))
165 166
166 ## PLS-cvApply: 167 ## PLS-cvApply:
167 msidata.cv.pls <- cvApply(msidata, .y = y_vector, .fold = fold_vector, .fun = "PLS", ncomp = components) 168 msidata.cv.pls <- crossValidate(msidata, .y = y_vector, .fold = fold_vector, .fun = "PLS", ncomp = components)
168 169
169 ## remove msidata to clean up RAM space 170 ## remove msidata to clean up RAM space
170 rm(msidata) 171 rm(msidata)
171 gc() 172 gc()
172 173
173 ## create table with summary 174 ## create new summary table with cv results
174 count = 1 175 results_list <- NULL
175 summary_plscv = list() 176 for (i in seq_along(components)) {
176 accuracy_vector = numeric() 177 ## extract accuracy, sensitivity, and specificity for the current i
177 for (iteration in components){ 178 accuracy <- round(as.data.frame(msidata.cv.pls@resultData@listData[[i]][["accuracy"]]), digits=2)
178 summary_iteration = summary(msidata.cv.pls)\$accuracy[[paste0("ncomp = ", iteration)]] 179 sensitivity <- round(as.data.frame(msidata.cv.pls@resultData@listData[[i]][["sensitivity"]]), digits=2)
179 ## change class of numbers into numeric to round and calculate mean 180 specificity <- round(as.data.frame(msidata.cv.pls@resultData@listData[[i]][["specificity"]]), digits=2)
180 summary_iteration2 = round(as.numeric(summary_iteration), digits=2) 181
181 summary_matrix = matrix(summary_iteration2, nrow=4, ncol=number_groups) 182 ## combine accuracy, sensitivity, and specificity into one data frame
182 accuracy_vector[count] = mean(summary_matrix[1,]) ## vector with accuracies to find later maximum for plot 183 result_df <- cbind(folds = rownames(accuracy), ncomp = i, accuracy, sensitivity, specificity)
183 summary_iteration3 = cbind(rownames(summary_iteration), summary_matrix) ## include rownames in table 184 colnames(result_df) <- c("folds", "ncomp", "accuracy", "sensitivity", "specificity")
184 summary_iteration4 = t(summary_iteration3) 185 rownames(result_df) <- NULL
185 summary_iteration5 = cbind(c(paste0("ncomp = ", iteration), colnames(summary_iteration)), summary_iteration4) 186
186 summary_plscv[[count]] = summary_iteration5 187 ## add column names with ncomp as first row to each dataframe
187 count = count+1} ## create list with summary table for each component 188 col_names_row <- data.frame(folds = "folds", ncomp = paste0("ncomp", i), accuracy = "accuracy", sensitivity = "sensitivity", specificity = "specificity")
188 summary_plscv = do.call(rbind, summary_plscv) 189 result_df <- rbind(col_names_row, result_df)
189 summary_df = as.data.frame(summary_plscv) 190
190 colnames(summary_df) = NULL 191 results_list[[i]] <- result_df
191 192 }
192 ## plots 193
193 ## plot to find ncomp with highest accuracy 194 ## combine all data frames in the list into one data frame
194 plot(components, accuracy_vector, ylab = "mean accuracy",type="o", main="Mean accuracy of PLS classification") 195 results_df <- do.call(rbind, results_list)
195 ncomp_max = components[which.max(accuracy_vector)] ## find ncomp with max. accuracy 196
196 ## one image for each sample/fold, 4 images per page 197 summary_df <- results_df
197 minimumy = min(coord(msidata.cv.pls)[,2]) 198
198 maximumy = max(coord(msidata.cv.pls)[,2]) 199 ## new table and plot of accuracies over all components
199 image(msidata.cv.pls, model = list(ncomp = ncomp_max),ylim= c(maximumy+0.2*maximumy,minimumy-0.2*minimumy),layout = c(1, 1), col=colourvector) 200 summary.cv.pls = as.data.frame(summary(msidata.cv.pls))
201 plot(0,type='n',axes=FALSE,ann=FALSE)
202 summary.cv.pls.round <- round(summary.cv.pls, digits=2)
203 grid.table(summary.cv.pls.round, rows=NULL)
204
205 accuracy_plot = ggplot(summary.cv.pls, aes(x = ncomp, y = Accuracy)) +
206 geom_point(color = "blue", size = 3) + # Add points
207 geom_line() +
208 theme_bw()
209 print(accuracy_plot)
200 210
201 ## print table with summary in pdf 211 ## print table with summary in pdf
202 par(opar) 212 par(opar)
203 plot(0,type='n',axes=FALSE,ann=FALSE) 213 plot(0,type='n',axes=FALSE,ann=FALSE)
204 title(main="Summary for the different components\n", adj=0.5) 214 title(main="Summary for the different components\n", adj=0.5)
214 if (maxcount <= nrow(summary_df)){ 224 if (maxcount <= nrow(summary_df)){
215 grid.table(summary_df[mincount:maxcount,], rows= NULL) 225 grid.table(summary_df[mincount:maxcount,], rows= NULL)
216 mincount = mincount+20 226 mincount = mincount+20
217 maxcount = maxcount+20 227 maxcount = maxcount+20
218 }else{### stop last page with last sample otherwise NA in table 228 }else{### stop last page with last sample otherwise NA in table
219 grid.table(summary_df[mincount:nrow(summary_df),], rows= NULL)} 229 grid.table(summary_df[mincount:nrow(summary_df),], rows= NULL)}
220 } 230 }
221 } 231 }
222 232
223 ## optional output as .RData 233 ## optional output as .RData
224 #if $output_rdata: 234 #if $output_rdata:
247 ## in case user used multiple inputs for component - this is only possible in cv apply 257 ## in case user used multiple inputs for component - this is only possible in cv apply
248 message("Error during PLS training") 258 message("Error during PLS training")
249 message("Possible problems: Multiple values for component were selected - this is only possible in cvapply but not for PLS analysis or component was set to 0 but minimum for component is 1)") 259 message("Possible problems: Multiple values for component were selected - this is only possible in cvapply but not for PLS analysis or component was set to 0 but minimum for component is 1)")
250 stop(call.=FALSE) 260 stop(call.=FALSE)
251 } 261 }
252 ) 262 )
253 263
254 ### pls analysis and coefficients plot 264 ### pls analysis and coefficients plot
255 msidata.pls <- PLS(msidata, y = y_vector, ncomp = component, scale=$type_cond.method_cond.analysis_cond.pls_scale) 265 msidata.pls <- PLS(msidata, y = y_vector, ncomp = component, scale=$type_cond.method_cond.analysis_cond.pls_scale)
256 plot(msidata.pls, main="PLS coefficients per m/z", col=colourvector) 266 plot(msidata.pls, main="PLS coefficients per m/z", col=colourvector)
257 267
258 ### summary table of PLS 268
259 summary_table = summary(msidata.pls)\$accuracy[[paste0("ncomp = ",component)]] 269 ## create new summary table
260 summary_table2 = round(as.numeric(summary_table), digits=2) 270 summary_df = as.data.frame(summary(msidata.pls))
261 summary_matrix = matrix(summary_table2, nrow=4, ncol=number_groups) 271 colnames(summary_df) = c("Number of Components", "Accuracy", "Sensitivity", "Specificity")
262 summary_table3 = cbind(rownames(summary_table), summary_matrix) ## include rownames in table 272 summary_df = round(summary_df, digits = 2)
263 summary_table4 = t(summary_table3)
264 summary_table5 = cbind(c(paste0("ncomp = ", component), colnames(summary_table)), summary_table4)
265 plot(0,type='n',axes=FALSE,ann=FALSE) 273 plot(0,type='n',axes=FALSE,ann=FALSE)
266 grid.table(summary_table5, rows= NULL) 274 grid.table(summary_df, rows= NULL)
267 275
268 ### image of the best m/z 276 ## Yweights plot: represent the importance of each response variable in predicting each component
269 minimumy = min(coord(msidata)[,2]) 277
270 maximumy = max(coord(msidata)[,2]) 278 #if $type_cond.method_cond.analysis_cond.PLS_Yweights == "TRUE":
271 print(image(msidata, mz = topFeatures(msidata.pls)[1,1], normalize.image = "linear", contrast.enhance = "histogram",ylim= c(maximumy+0.2*maximumy,minimumy-0.2*minimumy), smooth.image="gaussian", main="best m/z heatmap")) 279 Yweights = as.data.frame(msidata.pls@resultData@listData[[1]][["Yweights"]])
272 280 Yweights = round(Yweights, digits = 4)
273 ### m/z and pixel information output 281 Yweights.class <- cbind("class" = rownames(Yweights), Yweights)
274 pls_classes = data.frame(msidata.pls\$classes[[1]]) 282
283 plot(0,type='n',axes=FALSE,ann=FALSE)
284 text(x = 0.95, y = 1, "Yweights", cex = 2, font = 2)
285 grid.table(Yweights.class, rows= NULL)
286
287 #end if
288
289 coefficient_plot = plot(msidata.pls, values="coefficients", lwd=2, main = "PLS coefficients per m/z")
290 print(coefficient_plot)
291
292 ## m/z and pixel information output
293 pls_classes = data.frame(msidata.pls@resultData@listData[[1]][["class"]])
294
275 ## pixel names and coordinates 295 ## pixel names and coordinates
276 ## to remove potential sample names and z dimension, split at comma and take only x and y 296 x_coords = msidata_coordinates@listData[["x"]]
277 x_coords = unlist(lapply(strsplit(names(pixels(msidata)), ","), `[[`, 1)) 297 y_coords = msidata_coordinates@listData[["y"]]
278 y_coords = unlist(lapply(strsplit(names(pixels(msidata)), ","), `[[`, 2)) 298 pixel_names = paste0("xy_", x_coords, "_", y_coords)
279 x_coordinates = gsub("x = ","",x_coords)
280 y_coordinates = gsub(" y = ","",y_coords)
281 pixel_names = paste0("xy_", x_coordinates, "_", y_coordinates)
282 299
283 ## remove msidata to clean up RAM space 300 ## remove msidata to clean up RAM space
284 rm(msidata) 301 rm(msidata)
285 gc() 302 gc()
286 pls_classes2 = data.frame(pixel_names, x_coordinates, y_coordinates, pls_classes) 303
287 colnames(pls_classes2) = c("pixel names", "x", "y","predicted condition") 304 pls_classes2 = data.frame(pixel_names, x_coords, y_coords, pls_classes, y_vector)
288 pls_toplabels = topFeatures(msidata.pls, n=Inf) 305 colnames(pls_classes2) = c("pixel_name", "x", "y","predicted_class", "annotated_class")
289 pls_toplabels[,4:6] <-round(pls_toplabels[,4:6],6) 306 pls_classes2\$correct <- ifelse(pls_classes2\$predicted_class==pls_classes2\$annotated_class, T, F)
290 write.table(pls_toplabels, file="$mzfeatures", quote = FALSE, row.names = FALSE, col.names=TRUE, sep = "\t") 307
291 write.table(pls_classes2, file="$pixeloutput", quote = FALSE, row.names = FALSE, col.names=TRUE, sep = "\t") 308 write.table(pls_classes2, file="$pixeloutput", quote = FALSE, row.names = FALSE, col.names=TRUE, sep = "\t")
292 309
310 correctness = round(sum(pls_classes2\$correct)/length(pls_classes2\$correct)*100,2)
311
312 ## replace topFeatures table with coefficients table
313 coefficients.df = as.data.frame(msidata.pls@resultData@listData[[1]][["coefficients"]])
314 row_names <- msidata.pls@featureData@mz
315 coefficients.df.rownames <- cbind("mz" = row_names, coefficients.df)
316 write.table(coefficients.df.rownames, file = "$coefficients", quote = FALSE, sep = "\t", row.names = FALSE)
317
318 ## add loadings and weights table
319 loadings.df = as.data.frame(msidata.pls@resultData@listData[[1]][["loadings"]])
320 loadings.df <- cbind("mz" = row_names, loadings.df)
321 new_names <- paste0("loadings_", names(loadings.df)[-1])
322 names(loadings.df)[-1] <- new_names
323
324 weights.df = as.data.frame(msidata.pls@resultData@listData[[1]][["weights"]])
325 weights.df <- cbind("mz" = row_names, weights.df)
326 new_names <- paste0("weights_", names(weights.df)[-1])
327 names(weights.df)[-1] <- new_names
328
329 ## combine loading and weights table
330 merged.load.wei = merge(loadings.df, weights.df, by = "mz")
331 write.table(merged.load.wei, file = "$loadings_weights", quote = FALSE, sep = "\t", row.names = FALSE)
332
293 ## image with predicted classes 333 ## image with predicted classes
294 prediction_df = cbind(coord(msidata.pls)[,1:2], pls_classes) 334 prediction_df = as.data.frame(cbind(coord(msidata.pls)[,1:2], pls_classes))
295 colnames(prediction_df) = c("x", "y", "predicted_classes") 335 colnames(prediction_df) = c("x", "y", "predicted_classes")
296 336
297 prediction_plot = ggplot(prediction_df, aes(x=x, y=y, fill=predicted_classes))+ 337 prediction_plot = ggplot(prediction_df, aes(x=x, y=y, fill=predicted_classes))+
298 geom_tile() + 338 geom_tile() +
299 coord_fixed()+ 339 coord_fixed()+
300 ggtitle("Predicted condition for each pixel")+ 340 ggtitle("Predicted condition for each pixel")+
301 theme_bw()+ 341 theme_bw()+
302 theme( 342 theme(
303 plot.background = element_blank(), 343 plot.background = element_blank(),
304 panel.grid.major = element_blank(), 344 panel.grid.major = element_blank(),
305 panel.grid.minor = element_blank())+ 345 panel.grid.minor = element_blank())+
306 theme(text=element_text(family="ArialMT", face="bold", size=15))+ 346 theme(text=element_text(family="ArialMT", face="bold", size=15))+
307 theme(legend.position="bottom",legend.direction="vertical")+ 347 theme(legend.position="bottom",legend.direction="vertical")+
308 guides(fill=guide_legend(ncol=4,byrow=TRUE))+ 348 guides(fill=guide_legend(ncol=4,byrow=TRUE))+
309 scale_discrete_manual(aesthetics = c("colour", "fill"), values = colourvector) 349 scale_discrete_manual(aesthetics = c("colour", "fill"), values = colourvector)
310 coord_labels = aggregate(cbind(x,y)~predicted_classes, data=prediction_df, mean, na.rm=TRUE, na.action="na.pass") 350 coord_labels = aggregate(cbind(x,y)~predicted_classes, data=prediction_df, mean, na.rm=TRUE, na.action="na.pass")
311 coord_labels\$file_number = gsub( "_.*$", "", coord_labels\$predicted_classes) 351 ##coord_labels\$file_number = gsub( "_.*$", "", coord_labels\$predicted_classes)
312 print(prediction_plot) 352 print(prediction_plot)
353
354 ## correctness plot
355 correctness_plot = ggplot(pls_classes2, aes(x=x, y=y, fill=correct))+
356 geom_tile() +
357 coord_fixed()+
358 ggtitle(paste0("Correctness of classification: ", correctness, " %"))+
359 scale_fill_manual(values = c("TRUE" = "orange","FALSE" = "darkblue"))+
360 theme_bw()+
361 theme(
362 plot.background = element_blank(),
363 panel.grid.major = element_blank(),
364 panel.grid.minor = element_blank())+
365 theme(text=element_text(family="ArialMT", face="bold", size=15))+
366 theme(legend.position="bottom",legend.direction="vertical")+
367 guides(fill=guide_legend(ncol=2,byrow=TRUE))
368 coord_labels = aggregate(cbind(x,y)~correct, data=pls_classes2, mean, na.rm=TRUE, na.action="na.pass")
369 ##coord_labels\$file_number = gsub( "_.*$", "", coord_labels\$predicted_classes)
370 print(correctness_plot)
371
313 372
314 ### optional output as .RData 373 ### optional output as .RData
315 #if $output_rdata: 374 #if $output_rdata:
316 save(msidata.pls, file="$classification_rdata") 375 save(msidata.pls, file="$classification_rdata")
317 #end if 376 #end if
329 ## set variables for components and number of response groups 388 ## set variables for components and number of response groups
330 components = c($type_cond.method_cond.opls_analysis_cond.opls_cvcomp) 389 components = c($type_cond.method_cond.opls_analysis_cond.opls_cvcomp)
331 number_groups = length(levels(y_vector)) 390 number_groups = length(levels(y_vector))
332 391
333 ## OPLS-cvApply: 392 ## OPLS-cvApply:
334 msidata.cv.opls <- cvApply(msidata, .y = y_vector, .fold = fold_vector, .fun = "OPLS", ncomp = components) 393 msidata.cv.opls <- crossValidate(msidata, .y = y_vector, .fold = fold_vector, .fun = "OPLS", ncomp = components)
335 ## for use to reduce msidata: keep.Xnew = $type_cond.method_cond.opls_analysis_cond.xnew_cv
336 394
337 ## remove msidata to clean up RAM space 395 ## remove msidata to clean up RAM space
338 rm(msidata) 396 rm(msidata)
339 gc() 397 gc()
340 398
341 ## create table with summary 399
342 count = 1 400
343 summary_oplscv = list() 401 ## new table with cv results to replace the old summary table
344 accuracy_vector = numeric() 402 results_list <- NULL
345 for (iteration in components){ 403 for (i in seq_along(components)) {
346 404 ## extract accuracy, sensitivity, and specificity for the current i
347 summary_iteration = summary(msidata.cv.opls)\$accuracy[[paste0("ncomp = ", iteration)]] 405 accuracy <- round(as.data.frame(msidata.cv.opls@resultData@listData[[i]][["accuracy"]]), digits=2)
348 ## change class of numbers into numeric to round and calculate mean 406 sensitivity <- round(as.data.frame(msidata.cv.opls@resultData@listData[[i]][["sensitivity"]]), digits=2)
349 summary_iteration2 = round(as.numeric(summary_iteration), digits=2) 407 specificity <- round(as.data.frame(msidata.cv.opls@resultData@listData[[i]][["specificity"]]), digits=2)
350 summary_matrix = matrix(summary_iteration2, nrow=4, ncol=number_groups) 408
351 accuracy_vector[count] = mean(summary_matrix[1,]) ## vector with accuracies to find later maximum for plot 409 ## combine accuracy, sensitivity, and specificity into one data frame
352 summary_iteration3 = cbind(rownames(summary_iteration), summary_matrix) ## include rownames in table 410 result_df <- cbind(folds = rownames(accuracy), ncomp = i, accuracy, sensitivity, specificity)
353 summary_iteration4 = t(summary_iteration3) 411 colnames(result_df) <- c("folds", "ncomp", "accuracy", "sensitivity", "specificity")
354 summary_iteration5 = cbind(c(paste0("ncomp = ", iteration), colnames(summary_iteration)), summary_iteration4) 412 rownames(result_df) <- NULL
355 summary_oplscv[[count]] = summary_iteration5 413
356 count = count+1} ## create list with summary table for each component 414 ## add column names with ncomp as first row to each dataframe
357 summary_oplscv = do.call(rbind, summary_oplscv) 415 col_names_row <- data.frame(folds = "folds", ncomp = paste0("ncomp", i), accuracy = "accuracy", sensitivity = "sensitivity", specificity = "specificity")
358 summary_df = as.data.frame(summary_oplscv) 416 result_df <- rbind(col_names_row, result_df)
359 colnames(summary_df) = NULL 417
360 418 results_list[[i]] <- result_df
361 ## plots 419 }
362 ## plot to find ncomp with highest accuracy 420
363 plot(components, accuracy_vector, ylab = "mean accuracy", type="o", main="Mean accuracy of OPLS classification") 421 ## combine all data frames in the list into one data frame
364 ncomp_max = components[which.max(accuracy_vector)] ## find ncomp with max. accuracy 422 results_df <- do.call(rbind, results_list)
365 ## one image for each sample/fold, 4 images per page 423
366 minimumy = min(coord(msidata.cv.opls)[,2]) 424 summary_df <- results_df
367 maximumy = max(coord(msidata.cv.opls)[,2]) 425
368 image(msidata.cv.opls, model = list(ncomp = ncomp_max),ylim= c(maximumy+0.2*maximumy,minimumy-0.2*minimumy),layout = c(1, 1), col=colourvector) 426 ## new table and plot of accuracies over all components
427 summary.cv.opls = as.data.frame(summary(msidata.cv.opls))
428
429 ## table with values
430 plot(0,type='n',axes=FALSE,ann=FALSE)
431 summary.cv.opls.round <- round(summary.cv.opls, digits=2)
432 grid.table(summary.cv.opls.round, rows=NULL)
433
434 accuracy_plot = ggplot(summary.cv.opls, aes(x = ncomp, y = Accuracy)) +
435 geom_point(color = "blue", size = 3) + # Add points
436 geom_line() +
437 theme_bw()
438 print(accuracy_plot)
369 439
370 ## print table with summary in pdf 440 ## print table with summary in pdf
371 par(opar) 441 par(opar)
372 plot(0,type='n',axes=FALSE,ann=FALSE) 442 plot(0,type='n',axes=FALSE,ann=FALSE)
373 title(main="Summary for the different components\n", adj=0.5) 443 title(main="Summary for the different components\n", adj=0.5)
383 if (maxcount <= nrow(summary_df)){ 453 if (maxcount <= nrow(summary_df)){
384 grid.table(summary_df[mincount:maxcount,], rows= NULL) 454 grid.table(summary_df[mincount:maxcount,], rows= NULL)
385 mincount = mincount+20 455 mincount = mincount+20
386 maxcount = maxcount+20 456 maxcount = maxcount+20
387 }else{### stop last page with last sample otherwise NA in table 457 }else{### stop last page with last sample otherwise NA in table
388 grid.table(summary_df[mincount:nrow(summary_df),], rows= NULL)} 458 grid.table(summary_df[mincount:nrow(summary_df),], rows= NULL)}
389 } 459 }
390 } 460 }
391 461
392 ## optional output as .RData 462 ## optional output as .RData
393 #if $output_rdata: 463 #if $output_rdata:
416 ## in case user used multiple inputs for component - this is only possible in cv apply 486 ## in case user used multiple inputs for component - this is only possible in cv apply
417 message("Error during OPLS training") 487 message("Error during OPLS training")
418 message("Possible problems: Multiple values for component were selected - this is only possible in cvapply but not for OPLS analysis or component was set to 0 but minimum for component is 1)") 488 message("Possible problems: Multiple values for component were selected - this is only possible in cvapply but not for OPLS analysis or component was set to 0 but minimum for component is 1)")
419 stop(call.=FALSE) 489 stop(call.=FALSE)
420 } 490 }
421 ) 491 )
422 492
423 ### opls analysis and coefficients plot 493 ### opls analysis and coefficients plot
424 msidata.opls <- PLS(msidata, y = y_vector, ncomp = component, scale=$type_cond.method_cond.opls_analysis_cond.opls_scale) 494 msidata.opls <- OPLS(msidata, y = y_vector, ncomp = component, scale=$type_cond.method_cond.opls_analysis_cond.opls_scale)
425 ## to reduce msidata: keep.Xnew = $type_cond.method_cond.opls_analysis_cond.xnew
426 plot(msidata.opls, main="OPLS coefficients per m/z", col=colourvector) 495 plot(msidata.opls, main="OPLS coefficients per m/z", col=colourvector)
427 496
428 497 ## create new summary table
429 ### summary table of OPLS 498 summary_df = as.data.frame(summary(msidata.opls))
430 summary_table = summary(msidata.opls)\$accuracy[[paste0("ncomp = ",component)]] 499 colnames(summary_df) = c("Number of Components", "Accuracy", "Sensitivity", "Specificity")
431 summary_table2 = round(as.numeric(summary_table), digits=2) 500 summary_df = round(summary_df, digits = 2)
432 summary_matrix = matrix(summary_table2, nrow=4, ncol=number_groups)
433 summary_table3 = cbind(rownames(summary_table), summary_matrix) ## include rownames in table
434 summary_table4 = t(summary_table3)
435 summary_table5 = cbind(c(paste0("ncomp = ", component), colnames(summary_table)), summary_table4)
436 plot(0,type='n',axes=FALSE,ann=FALSE) 501 plot(0,type='n',axes=FALSE,ann=FALSE)
437 grid.table(summary_table5, rows= NULL) 502 grid.table(summary_df, rows= NULL)
438 503
439 ### image of the best m/z 504
440 minimumy = min(coord(msidata)[,2]) 505 #if $type_cond.method_cond.opls_analysis_cond.OPLS_Yweights == "TRUE":
441 maximumy = max(coord(msidata)[,2]) 506 ## Yweights plot: represent the importance of each response variable in predicting each component
442 print(image(msidata, mz = topFeatures(msidata.opls)[1,1], normalize.image = "linear", contrast.enhance = "histogram",smooth.image="gaussian", ylim= c(maximumy+0.2*maximumy,minimumy-0.2*minimumy), main="best m/z heatmap")) 507 Yweights = as.data.frame(msidata.opls@resultData@listData[[1]][["Yweights"]])
443 508 Yweights = round(Yweights, digits = 4)
444 opls_classes = data.frame(msidata.opls\$classes[[1]]) 509 Yweights.class <- cbind("class" = rownames(Yweights), Yweights)
510
511 plot(0,type='n',axes=FALSE,ann=FALSE)
512 text(x = 0.95, y = 1, "Yweights", cex = 2, font = 2)
513 grid.table(Yweights.class, rows= NULL)
514 #end if
515
516 coefficient_plot = plot(msidata.opls, values="coefficients", lwd=2, main = "OPLS coefficients per m/z")
517 print(coefficient_plot)
518
519 ## m/z and pixel information output
520 opls_classes = data.frame(msidata.opls@resultData@listData[[1]][["class"]])
521
445 ## pixel names and coordinates 522 ## pixel names and coordinates
446 ## to remove potential sample names and z dimension, split at comma and take only x and y 523 x_coords = msidata_coordinates@listData[["x"]]
447 x_coords = unlist(lapply(strsplit(names(pixels(msidata)), ","), `[[`, 1)) 524 y_coords = msidata_coordinates@listData[["y"]]
448 y_coords = unlist(lapply(strsplit(names(pixels(msidata)), ","), `[[`, 2)) 525 pixel_names = paste0("xy_", x_coords, "_", y_coords)
449 x_coordinates = gsub("x = ","",x_coords) 526
450 y_coordinates = gsub(" y = ","",y_coords) 527 opls_classes2 = data.frame(pixel_names, x_coords, y_coords, opls_classes, y_vector)
451 pixel_names = paste0("xy_", x_coordinates, "_", y_coordinates) 528 colnames(opls_classes2) = c("pixel names", "x", "y","predicted_class", "annotated_class")
452 529 opls_classes2\$correct <- ifelse(opls_classes2\$predicted_class == opls_classes2\$annotated_class, T, F)
453 opls_classes2 = data.frame(pixel_names, x_coordinates, y_coordinates, opls_classes) 530
454 colnames(opls_classes2) = c("pixel names", "x", "y","predicted condition") 531 write.table(opls_classes2, file="$pixeloutput", quote = FALSE, row.names = FALSE, col.names=TRUE, sep = "\t")
532
533 correctness = round(sum(opls_classes2\$correct)/length(opls_classes2\$correct)*100,2)
455 534
456 ## remove msidata to clean up RAM space 535 ## remove msidata to clean up RAM space
457 rm(msidata) 536 rm(msidata)
458 gc() 537 gc()
459 538
460 opls_toplabels = topFeatures(msidata.opls, n=Inf) 539 ## replace topFeatures table with coefficients table
461 opls_toplabels[,4:6] <-round(opls_toplabels[,4:6],6) 540 coefficients.df = as.data.frame(msidata.opls@resultData@listData[[1]][["coefficients"]])
462 write.table(opls_toplabels, file="$mzfeatures", quote = FALSE, row.names = FALSE, col.names=TRUE, sep = "\t") 541 row_names <- msidata.opls@featureData@mz
463 write.table(opls_classes2, file="$pixeloutput", quote = FALSE, row.names = FALSE, col.names=TRUE, sep = "\t") 542 coefficients.df.rownames <- cbind("mz" = row_names, coefficients.df)
543 write.table(coefficients.df.rownames, file = "$coefficients", quote = FALSE, sep = "\t", row.names = FALSE)
544
545 ## add loadings and weights table
546 loadings.df = as.data.frame(msidata.opls@resultData@listData[[1]][["loadings"]])
547 loadings.df <- cbind("mz" = row_names, loadings.df)
548 new_names <- paste0("loadings_", names(loadings.df)[-1])
549 names(loadings.df)[-1] <- new_names
550
551 weights.df = as.data.frame(msidata.opls@resultData@listData[[1]][["weights"]])
552 weights.df <- cbind("mz" = row_names, weights.df)
553 new_names <- paste0("weights_", names(weights.df)[-1])
554 names(weights.df)[-1] <- new_names
555
556 ## combine loading and weights table
557 merged.load.wei = merge(loadings.df, weights.df, by = "mz")
558 write.table(merged.load.wei, file = "$loadings_weights", quote = FALSE, sep = "\t", row.names = FALSE)
464 559
465 ## image with predicted classes 560 ## image with predicted classes
466 prediction_df = cbind(coord(msidata.opls)[,1:2], opls_classes) 561 prediction_df = as.data.frame(cbind(coord(msidata.opls)[,1:2], opls_classes))
467 colnames(prediction_df) = c("x", "y", "predicted_classes") 562 colnames(prediction_df) = c("x", "y", "predicted_classes")
468 563
469 prediction_plot = ggplot(prediction_df, aes(x=x, y=y, fill=predicted_classes))+ 564 prediction_plot = ggplot(prediction_df, aes(x=x, y=y, fill=predicted_classes))+
470 geom_tile() + 565 geom_tile() +
471 coord_fixed()+ 566 coord_fixed()+
472 ggtitle("Predicted condition for each pixel")+ 567 ggtitle("Predicted condition for each pixel")+
473 theme_bw()+ 568 theme_bw()+
474 theme( 569 theme(
475 plot.background = element_blank(), 570 plot.background = element_blank(),
476 panel.grid.major = element_blank(), 571 panel.grid.major = element_blank(),
477 panel.grid.minor = element_blank())+ 572 panel.grid.minor = element_blank())+
478 theme(text=element_text(family="ArialMT", face="bold", size=15))+ 573 theme(text=element_text(family="ArialMT", face="bold", size=15))+
479 theme(legend.position="bottom",legend.direction="vertical")+ 574 theme(legend.position="bottom",legend.direction="vertical")+
480 guides(fill=guide_legend(ncol=4,byrow=TRUE))+ 575 guides(fill=guide_legend(ncol=4,byrow=TRUE))+
481 scale_discrete_manual(aesthetics = c("colour", "fill"), values = colourvector) 576 scale_discrete_manual(aesthetics = c("colour", "fill"), values = colourvector)
482 coord_labels = aggregate(cbind(x,y)~predicted_classes, data=prediction_df, mean, na.rm=TRUE, na.action="na.pass") 577 coord_labels = aggregate(cbind(x,y)~predicted_classes, data=prediction_df, mean, na.rm=TRUE, na.action="na.pass")
483 coord_labels\$file_number = gsub( "_.*$", "", coord_labels\$predicted_classes) 578 ##coord_labels\$file_number = gsub( "_.*$", "", coord_labels\$predicted_classes)
484 print(prediction_plot) 579 print(prediction_plot)
485 580
486 ## optional output as .RData 581 ## correctness plot
487 #if $output_rdata: 582 correctness_plot = ggplot(opls_classes2, aes(x=x, y=y, fill=correct))+
583 geom_tile() +
584 coord_fixed()+
585 ggtitle(paste0("Correctness of classification: ", correctness, " %"))+
586 scale_fill_manual(values = c("TRUE" = "orange","FALSE" = "darkblue"))+
587 theme_bw()+
588 theme(
589 plot.background = element_blank(),
590 panel.grid.major = element_blank(),
591 panel.grid.minor = element_blank())+
592 theme(text=element_text(family="ArialMT", face="bold", size=15))+
593 theme(legend.position="bottom",legend.direction="vertical")+
594 guides(fill=guide_legend(ncol=2,byrow=TRUE))
595 coord_labels = aggregate(cbind(x,y)~correct, data=opls_classes2, mean, na.rm=TRUE, na.action="na.pass")
596 ##coord_labels\$file_number = gsub( "_.*$", "", coord_labels\$predicted_classes)
597 print(correctness_plot)
598
599
600 ## optional output as .RData
601 #if $output_rdata:
488 save(msidata.opls, file="$classification_rdata") 602 save(msidata.opls, file="$classification_rdata")
489 #end if 603 #end if
490 #end if 604 #end if
491 605
492 606
493 ######################## SSC ############################# 607 ######################## SSC #############################
494 #elif str( $type_cond.method_cond.class_method) == "spatialShrunkenCentroids": 608 #elif str( $type_cond.method_cond.class_method) == "spatialShrunkenCentroids":
500 614
501 ## set variables for components and number of response groups 615 ## set variables for components and number of response groups
502 number_groups = length(levels(y_vector)) 616 number_groups = length(levels(y_vector))
503 617
504 ## SSC-cvApply: 618 ## SSC-cvApply:
505 msidata.cv.ssc <- cvApply(msidata, .y = y_vector,.fold = fold_vector,.fun = "spatialShrunkenCentroids", r = c($type_cond.method_cond.ssc_r), s = c($type_cond.method_cond.ssc_s), method = "$type_cond.method_cond.ssc_kernel_method") 619 msidata.cv.ssc <- crossValidate(msidata, .y = y_vector,.fold = fold_vector,.fun = "spatialShrunkenCentroids", r = c($type_cond.method_cond.ssc_r), s = c($type_cond.method_cond.ssc_s), method = "$type_cond.method_cond.ssc_kernel_method")
620
506 621
507 ## remove msidata to clean up RAM space 622 ## remove msidata to clean up RAM space
508 rm(msidata) 623 rm(msidata)
509 gc() 624 gc()
510 625
511 ## create table with summary 626 ## new table and plot of accuracies over all components
512 count = 1 627 summary.cv.ssc = as.data.frame(summary(msidata.cv.ssc))
513 summary_ssccv = list() 628 summary.cv.ssc.round <- round(summary.cv.ssc, digits=2)
514 accuracy_vector = numeric() 629
515 iteration_vector = character()
516 for (iteration in names(msidata.cv.ssc@resultData[[1]][,1])){
517
518 summary_iteration = summary(msidata.cv.ssc)\$accuracy[[iteration]]
519 ## change class of numbers into numeric to round and calculate mean
520 summary_iteration2 = round(as.numeric(summary_iteration), digits=2)
521 summary_matrix = matrix(summary_iteration2, nrow=4, ncol=number_groups)
522 accuracy_vector[count] = mean(summary_matrix[1,]) ## vector with accuracies to find later maximum for plot
523 summary_iteration3 = cbind(rownames(summary_iteration), summary_matrix) ## include rownames in table
524 summary_iteration4 = t(summary_iteration3)
525 summary_iteration5 = cbind(c(iteration, colnames(summary_iteration)), summary_iteration4)
526 summary_ssccv[[count]] = summary_iteration5
527 iteration_vector[count] = unlist(strsplit(iteration, "[,]"))[3]
528 count = count+1} ## create list with summary table for each component
529 summary_ssccv = do.call(rbind, summary_ssccv)
530 summary_df = as.data.frame(summary_ssccv)
531 colnames(summary_df) = NULL
532
533 ## plot to find parameters with highest accuracy
534 plot(c($type_cond.method_cond.ssc_s),accuracy_vector[!duplicated(iteration_vector)], type="o",ylab="Mean accuracy", xlab = "Shrinkage parameter (s)", main="Mean accuracy of SSC classification")
535 best_params = names(msidata.cv.ssc@resultData[[1]][,1])[which.max(accuracy_vector)] ## find parameters with max. accuracy
536 r_value = as.numeric(substring(unlist(strsplit(best_params, ","))[1], 4))
537 s_value = as.numeric(substring(unlist(strsplit(best_params, ","))[3], 5)) ## remove space
538 minimumy = min(coord(msidata.cv.ssc)[,2])
539 maximumy = max(coord(msidata.cv.ssc)[,2])
540 image(msidata.cv.ssc, model = list( r = r_value, s = s_value ), ylim= c(maximumy+0.2*maximumy,minimumy-0.2*minimumy),layout=c(1,1), col=colourvector)
541
542 #if $type_cond.method_cond.ssc_analysis_cond.write_best_params:
543 write.table(r_value, file="$best_r", quote = FALSE, row.names = FALSE, col.names=FALSE, sep = "\t")
544 write.table(s_value, file="$best_s", quote = FALSE, row.names = FALSE, col.names=FALSE, sep = "\t")
545 #end if
546
547 ## print table with summary in pdf
548 par(opar) 630 par(opar)
549 plot(0,type='n',axes=FALSE,ann=FALSE) 631 plot(0,type='n',axes=FALSE,ann=FALSE)
550 title(main="Summary for the different parameters\n", adj=0.5) 632 title(main="Summary for the different parameters\n", adj=0.5)
633 ## 20 rows fits in one page:
634 if (nrow(summary.cv.ssc.round)<=20){
635 grid.table(summary.cv.ssc.round, rows= NULL)
636 }else{
637 grid.table(summary.cv.ssc.round[1:20,], rows= NULL)
638 mincount = 21
639 maxcount = 40
640 for (count20 in 1:(ceiling(nrow(summary.cv.ssc.round)/20)-1)){
641 plot(0,type='n',axes=FALSE,ann=FALSE)
642 if (maxcount <= nrow(summary.cv.ssc.round)){
643 grid.table(summary.cv.ssc.round[mincount:maxcount,], rows= NULL)
644 mincount = mincount+20
645 maxcount = maxcount+20
646 }else{### stop last page with last sample otherwise NA in table
647 grid.table(summary.cv.ssc.round[mincount:nrow(summary.cv.ssc.round),], rows= NULL)}
648 }
649 }
650
651 ## new accuracy plots
652 #if $type_cond.method_cond.ssc_analysis_cond.ssc_cv_accuracy_plot == "TRUE":
653 accuracy_plot = ggplot(summary.cv.ssc, aes(x = s, y = Accuracy)) +
654 geom_point(color = "blue", size = 3) + # Add points
655 geom_line() +
656 theme_bw() +
657 facet_wrap(~ r)
658
659 print(accuracy_plot)
660
661 ## or as alternative accuracy plot for each r value on own page:
662 #elif $type_cond.method_cond.ssc_analysis_cond.ssc_cv_accuracy_plot == "FALSE":
663 unique_r_values <- unique(summary.cv.ssc\$r)
664
665 for (r_value in unique_r_values) {
666 ## Create a subset for the current value of r
667 plot_data <- subset(summary.cv.ssc, r == r_value)
668 ## Create the accuracy plot for the current value of r
669 accuracy_plot <- ggplot(plot_data, aes(x = s, y = Accuracy)) +
670 geom_point(color = "blue", size = 3) + # Add points
671 geom_line() +
672 theme_bw() +
673 ggtitle(paste("Plot for r =", r_value)) + # Add a title
674 theme(plot.title = element_text(hjust = 0.5)) # Center the title
675 print(accuracy_plot)
676 }
677 #end if
678
679 ## table with cv values per fold group for each combination of r and s
680 r_s_df = as.data.frame(msidata.cv.ssc@modelData@listData)
681 r_s_df\$parameter = paste0("r=", r_s_df\$r, " and s=", r_s_df\$s)
682 iteration = seq_along(r_s_df\$parameter)
683
684 results_list <- NULL
685 for (i in iteration) {
686 ## extract accuracy, sensitivity, and specificity for the current i
687 accuracy <- round(as.data.frame(msidata.cv.ssc@resultData@listData[[i]][["accuracy"]]), digits=2)
688 sensitivity <- round(as.data.frame(msidata.cv.ssc@resultData@listData[[i]][["sensitivity"]]), digits=2)
689 specificity <- round(as.data.frame(msidata.cv.ssc@resultData@listData[[i]][["specificity"]]), digits=2)
690
691 ## combine accuracy, sensitivity, and specificity into one data frame
692 result_df <- cbind(folds = rownames(accuracy), parameter = r_s_df\$parameter[i], accuracy, sensitivity, specificity)
693 colnames(result_df) <- c("folds", "parameter", "accuracy", "sensitivity", "specificity")
694 rownames(result_df) <- NULL
695
696 ## add column names as first row to each dataframe
697 col_names_row <- data.frame(folds = "folds", parameter = "parameter", accuracy = "accuracy", sensitivity = "sensitivity", specificity = "specificity")
698 result_df <- rbind(col_names_row, result_df)
699
700 results_list[[i]] <- result_df
701 }
702
703 ## combine all data frames in the list into one data frame
704 results_df <- do.call(rbind, results_list)
705 summary_df <- results_df
706
707 par(opar)
708 plot(0,type='n',axes=FALSE,ann=FALSE)
709 title(main="More advanced folds output table: \n Summary for each fold\n", adj=0.5)
551 ## 20 rows fits in one page: 710 ## 20 rows fits in one page:
552 if (nrow(summary_df)<=20){ 711 if (nrow(summary_df)<=20){
553 grid.table(summary_df, rows= NULL) 712 grid.table(summary_df, rows= NULL)
554 }else{ 713 }else{
555 grid.table(summary_df[1:20,], rows= NULL) 714 grid.table(summary_df[1:20,], rows= NULL)
560 if (maxcount <= nrow(summary_df)){ 719 if (maxcount <= nrow(summary_df)){
561 grid.table(summary_df[mincount:maxcount,], rows= NULL) 720 grid.table(summary_df[mincount:maxcount,], rows= NULL)
562 mincount = mincount+20 721 mincount = mincount+20
563 maxcount = maxcount+20 722 maxcount = maxcount+20
564 }else{### stop last page with last sample otherwise NA in table 723 }else{### stop last page with last sample otherwise NA in table
565 grid.table(summary_df[mincount:nrow(summary_df),], rows= NULL)} 724 grid.table(summary_df[mincount:nrow(summary_df),], rows= NULL)}
566 } 725 }
567 } 726 }
727
728
729 ## new code to extract best r and s values
730 max_accuracy_index <- which.max(summary.cv.ssc\$Accuracy)
731
732 ## extract the corresponding values of "r" and "s"
733 highest_accuracy_r <- summary.cv.ssc\$r[max_accuracy_index]
734 highest_accuracy_s <- summary.cv.ssc\$s[max_accuracy_index]
735
736 #if $type_cond.method_cond.ssc_analysis_cond.write_best_params:
737 write.table(highest_accuracy_r, file="$best_r", quote = FALSE, row.names = FALSE, col.names=FALSE, sep = "\t")
738 write.table(highest_accuracy_s, file="$best_s", quote = FALSE, row.names = FALSE, col.names=FALSE, sep = "\t")
739 #end if
568 740
569 ## optional output as .RData 741 ## optional output as .RData
570 #if $output_rdata: 742 #if $output_rdata:
571 save(msidata.cv.ssc, file="$classification_rdata") 743 save(msidata.cv.ssc, file="$classification_rdata")
572 #end if 744 #end if
577 749
578 ## set variables for components and number of response groups 750 ## set variables for components and number of response groups
579 number_groups = length(levels(y_vector)) 751 number_groups = length(levels(y_vector))
580 752
581 ## SSC analysis and plot 753 ## SSC analysis and plot
582 msidata.ssc <- spatialShrunkenCentroids(msidata, y = y_vector, 754 msidata.ssc <- spatialShrunkenCentroids(msidata, y = y_vector, r = c($type_cond.method_cond.ssc_r), s = c($type_cond.method_cond.ssc_s), method = "$type_cond.method_cond.ssc_kernel_method")
583 r = c($type_cond.method_cond.ssc_r), s = c($type_cond.method_cond.ssc_s), method = "$type_cond.method_cond.ssc_kernel_method") 755 print(plot(msidata.ssc, values = "statistic", model = list(r = c($type_cond.method_cond.ssc_r), s = c($type_cond.method_cond.ssc_s)), col=colourvector, lwd=2))
584 plot(msidata.ssc, mode = "tstatistics", model = list("r" = c($type_cond.method_cond.ssc_r), "s" = c($type_cond.method_cond.ssc_s)),
585 col=colourvector, lwd=2)
586
587 ### summary table SSC
588 ##############summary_table = summary(msidata.ssc)
589 756
590 ### stop if multiple values for r and s were used as input 757 ### stop if multiple values for r and s were used as input
591 tryCatch( 758 tryCatch(
592 { 759 {
593 760
600 ## in case user used multiple inputs for r or s stop - this is only possible in cv apply 767 ## in case user used multiple inputs for r or s stop - this is only possible in cv apply
601 message("Error during SSC training") 768 message("Error during SSC training")
602 message("Possible problem: multiple values for r or s selected - this is only possible in cvapply but not for spatial shrunken centroid analysis)") 769 message("Possible problem: multiple values for r or s selected - this is only possible in cvapply but not for spatial shrunken centroid analysis)")
603 stop(call.=FALSE) 770 stop(call.=FALSE)
604 } 771 }
605 ) 772 )
606 773
607 summary_table = summary(msidata.ssc)\$accuracy[[names(msidata.ssc@resultData)]] 774 summary_df = as.data.frame(summary(msidata.ssc))
608 summary_table2 = round(as.numeric(summary_table), digits=2) 775 summary_df = round(summary_df, digits=3)
609 summary_matrix = matrix(summary_table2, nrow=4, ncol=number_groups) 776 colnames(summary_df) = c("Radius r", "Shrinkage s", "Features/Class", "Accuracy", "Sensitivity", "Specificity")
610 summary_table3 = cbind(rownames(summary_table), summary_matrix) ## include rownames in table
611 summary_table4 = t(summary_table3)
612 summary_table5 = cbind(c(names(msidata.ssc@resultData),colnames(summary_table)), summary_table4)
613 plot(0,type='n',axes=FALSE,ann=FALSE) 777 plot(0,type='n',axes=FALSE,ann=FALSE)
614 grid.table(summary_table5, rows= NULL) 778 grid.table(summary_df, rows= NULL)
615 779
616 ### image of the best m/z 780 ## image of the best m/z
617 minimumy = min(coord(msidata)[,2]) 781 minimumy = min(coord(msidata)[,2])
618 maximumy = max(coord(msidata)[,2]) 782 maximumy = max(coord(msidata)[,2])
619 print(image(msidata, mz = topFeatures(msidata.ssc)[1,1], normalize.image = "linear", contrast.enhance = "histogram",smooth.image="gaussian", ylim= c(maximumy+0.2*maximumy,minimumy-0.2*minimumy), main="best m/z heatmap")) 783 print(image(msidata, mz = topFeatures(msidata.ssc)[1,1], normalize.image = "linear", contrast.enhance = "histogram",smooth.image="gaussian", ylim= c(maximumy+0.2*maximumy,minimumy-0.2*minimumy), main="best m/z heatmap"))
620 784
621 ## m/z and pixel information output 785 ## m/z and pixel information output
622 ssc_classes = data.frame(msidata.ssc\$classes[[1]]) 786 x_coords = msidata_coordinates@listData[["x"]]
623 ssc_probabilities = data.frame(msidata.ssc\$probabilities[[1]]) 787 y_coords = msidata_coordinates@listData[["y"]]
624 788 pixel_names = paste0("xy_", x_coords, "_", y_coords)
625 ## pixel names and coordinates
626 ## to remove potential sample names and z dimension, split at comma and take only x and y
627 x_coords = unlist(lapply(strsplit(names(pixels(msidata)), ","), `[[`, 1))
628 y_coords = unlist(lapply(strsplit(names(pixels(msidata)), ","), `[[`, 2))
629 x_coordinates = gsub("x = ","",x_coords)
630 y_coordinates = gsub(" y = ","",y_coords)
631 pixel_names = paste0("xy_", x_coordinates, "_", y_coordinates)
632 789
633 790
634 ## remove msidata to clean up RAM space 791 ## remove msidata to clean up RAM space
635 rm(msidata) 792 rm(msidata)
636 gc() 793 gc()
637 794
638 ssc_classes2 = data.frame(pixel_names, x_coordinates, y_coordinates, ssc_classes, ssc_probabilities) 795
639 colnames(ssc_classes2) = c("pixel names", "x", "y","predicted condition", levels(msidata.ssc\$classes[[1]])) 796 ## toplabel (m/z features output)
640 ssc_toplabels = topFeatures(msidata.ssc, n=Inf) 797 ssc_toplabels = topFeatures(msidata.ssc, n=$type_cond.method_cond.ssc_toplabels)
641 ssc_toplabels[,6:9] <-round(ssc_toplabels[,6:9],6) 798 ssc_toplabels@listData[["centers"]] = round (ssc_toplabels@listData[["centers"]], digits = 6)
799 ssc_toplabels@listData[["statistic"]] = round (ssc_toplabels@listData[["statistic"]], digits = 6)
642 write.table(ssc_toplabels, file="$mzfeatures", quote = FALSE, row.names = FALSE, col.names=TRUE, sep = "\t") 800 write.table(ssc_toplabels, file="$mzfeatures", quote = FALSE, row.names = FALSE, col.names=TRUE, sep = "\t")
801
802 print(image(msidata.ssc, model=list(r = c($type_cond.method_cond.ssc_r), s = c($type_cond.method_cond.ssc_s)), ylim= c(maximumy+0.2*maximumy,minimumy-0.2*minimumy), col=colourvector, values="class", layout=c(1,1), main="Class Prediction"))
803 print(image(msidata.ssc, model=list(r = c($type_cond.method_cond.ssc_r), s = c($type_cond.method_cond.ssc_s)), ylim= c(maximumy+0.2*maximumy,minimumy-0.2*minimumy), col=colourvector, values="probability", layout=c(1,1), main="Class Probabilities"))
804
805
806 ## pixel output with correctness
807 ssc_classes = data.frame(msidata.ssc@resultData@listData[[1]][["class"]])
808 colnames(ssc_classes) = "predicted_class"
809 ssc_classes\$predicted_class = ifelse(is.na(ssc_classes\$predicted_class), "NA", as.character(ssc_classes\$predicted_class))
810 ssc_probabilities = data.frame(msidata.ssc@resultData@listData[[1]][["probability"]])
811
812
813 ssc_classes2 = data.frame(pixel_names, x_coords, y_coords, ssc_classes, ssc_probabilities, y_vector)
814 colnames(ssc_classes2) = c("pixel_names", "x", "y","predicted_classes", levels(msidata.ssc@resultData@listData[[1]][["class"]]), "annotated_class")
815 ssc_classes2\$correct<- ifelse(ssc_classes2\$predicted_classes==ssc_classes2\$annotated_class, T, F)
816 correctness = round(sum(ssc_classes2\$correct, na.rm = TRUE)/length(ssc_classes2\$correct)*100,2)
817
643 write.table(ssc_classes2, file="$pixeloutput", quote = FALSE, row.names = FALSE, col.names=TRUE, sep = "\t") 818 write.table(ssc_classes2, file="$pixeloutput", quote = FALSE, row.names = FALSE, col.names=TRUE, sep = "\t")
644 819
645 image(msidata.ssc, model=list(r = c($type_cond.method_cond.ssc_r), s = c($type_cond.method_cond.ssc_s)), ylim= c(maximumy+0.2*maximumy,minimumy-0.2*minimumy), 820 correctness_plot = ggplot(ssc_classes2, aes(x=x, y=y, fill=correct))+
646 col=colourvector, mode="classes", layout=c(1,1), main="Class Prediction")
647 image(msidata.ssc, model=list(r = c($type_cond.method_cond.ssc_r), s = c($type_cond.method_cond.ssc_s)), ylim= c(maximumy+0.2*maximumy,minimumy-0.2*minimumy),
648 col=colourvector, mode="probabilities", layout=c(1,1), main="Class probabilities")
649
650
651 ## image with right and wrong classes:
652 prediction_df = cbind(coord(msidata.ssc)[,1:2], ssc_classes)
653 colnames(prediction_df) = c("x", "y", "predicted_classes")
654 comparison_df = cbind(prediction_df, y_vector)
655 comparison_df\$correct<- ifelse(comparison_df\$predicted_classes==comparison_df\$y_vector, T, F)
656 correctness = round(sum(comparison_df\$correct)/length(comparison_df\$correct)*100,2)
657
658 correctness_plot = ggplot(comparison_df, aes(x=x, y=y, fill=correct))+
659 geom_tile() + 821 geom_tile() +
660 coord_fixed()+ 822 coord_fixed()+
661 ggtitle(paste0("Correctness of classification: ",correctness, "%"))+ 823 ggtitle(paste0("Correctness of classification: ", correctness, " %"))+
662 scale_fill_manual(values = c("TRUE" = "orange","FALSE" = "darkblue"))+ 824 scale_fill_manual(values = c("TRUE" = "orange","FALSE" = "darkblue"))+
663 theme_bw()+ 825 theme_bw()+
664 theme( 826 theme(
665 plot.background = element_blank(), 827 plot.background = element_blank(),
666 panel.grid.major = element_blank(), 828 panel.grid.major = element_blank(),
667 panel.grid.minor = element_blank())+ 829 panel.grid.minor = element_blank())+
668 theme(text=element_text(family="ArialMT", face="bold", size=15))+ 830 theme(text=element_text(family="ArialMT", face="bold", size=15))+
669 theme(legend.position="bottom",legend.direction="vertical")+ 831 theme(legend.position="bottom",legend.direction="vertical")+
670 guides(fill=guide_legend(ncol=2,byrow=TRUE)) 832 guides(fill=guide_legend(ncol=2,byrow=TRUE))
671 ## coord_labels = aggregate(cbind(x,y)~correct, data=comparison_df, mean, na.rm=TRUE, na.action="na.pass") 833 coord_labels = aggregate(cbind(x,y)~correct, data=ssc_classes2, mean, na.rm=TRUE, na.action="na.pass")
672 ##coord_labels\$file_number = gsub( "_.*$", "", coord_labels\$predicted_classes) 834 ##coord_labels\$file_number = gsub( "_.*$", "", coord_labels\$predicted_classes)
673 print(correctness_plot) 835 print(correctness_plot)
836
674 837
675 ## optional output as .RData 838 ## optional output as .RData
676 #if $output_rdata: 839 #if $output_rdata:
677 save(msidata.ssc, file="$classification_rdata") 840 save(msidata.ssc, file="$classification_rdata")
678 #end if 841 #end if
679
680 #end if 842 #end if
681 #end if 843 #end if
682 844
683 845
684 846
694 print("new response") 856 print("new response")
695 857
696 new_y_tabular = read.delim("$type_cond.new_y_values_cond.new_response_file", header = $type_cond.new_y_values_cond.new_tabular_header, stringsAsFactors = FALSE) 858 new_y_tabular = read.delim("$type_cond.new_y_values_cond.new_response_file", header = $type_cond.new_y_values_cond.new_tabular_header, stringsAsFactors = FALSE)
697 new_y_input = new_y_tabular[,c($type_cond.new_y_values_cond.column_new_x, $type_cond.new_y_values_cond.column_new_y, $type_cond.new_y_values_cond.column_new_response)] 859 new_y_input = new_y_tabular[,c($type_cond.new_y_values_cond.column_new_x, $type_cond.new_y_values_cond.column_new_y, $type_cond.new_y_values_cond.column_new_response)]
698 colnames(new_y_input)[1:2] = c("x", "y") 860 colnames(new_y_input)[1:2] = c("x", "y")
861
699 ## merge with coordinate information of msidata 862 ## merge with coordinate information of msidata
700 msidata_coordinates = cbind(coord(msidata)[,1:2], c(1:ncol(msidata))) 863 msidata_coordinates = cbind(coord(msidata)[,1:2], c(1:ncol(msidata)))
701 colnames(msidata_coordinates)[3] = "pixel_index" 864 colnames(msidata_coordinates)[3] = "pixel_index"
702 merged_response = merge(msidata_coordinates, new_y_input, by=c("x", "y"), all.x=TRUE) 865 merged_response = as.data.frame(merge(msidata_coordinates, new_y_input, by=c("x", "y"), all.x=TRUE))
703 merged_response[is.na(merged_response)] = "NA" 866 merged_response[is.na(merged_response)] = "NA"
704 merged_response = merged_response[order(merged_response\$pixel_index),] 867 merged_response = merged_response[order(merged_response\$pixel_index),]
705 new_y_vector = as.factor(merged_response[,4]) 868 new_y_vector = as.factor(merged_response[,4])
706 prediction = predict(training_data,msidata, newy = new_y_vector) 869
707 870 prediction = predict(training_data, msidata, newy = new_y_vector)
871
708 ##numbers of levels for colour selection 872 ##numbers of levels for colour selection
709 number_levels = length(levels(new_y_vector)) 873 number_levels = length(levels(new_y_vector))
710 874
711 ## Summary table prediction 875 ##new summary table
712 summary_table = summary(prediction)\$accuracy[[names(prediction@resultData)]] 876
713 summary_table2 = round(as.numeric(summary_table), digits=2) 877 ##if SSC classification, summary table has more results:
714 summary_matrix = matrix(summary_table2, nrow=4, ncol=ncol(summary_table)) 878 #if str($type_cond.classification_type) == "SSC_classifier":
715 summary_table3 = cbind(rownames(summary_table), summary_matrix) ## include rownames in table 879 print("SSC classification summary")
716 summary_table4 = t(summary_table3) 880
717 summary_table5 = cbind(c(names(prediction@resultData),colnames(summary_table)), summary_table4) 881 summary_df = as.data.frame(summary(prediction))
718 plot(0,type='n',axes=FALSE,ann=FALSE) 882 summary_df = round(summary_df, digits=3)
719 grid.table(summary_table5, rows= NULL) 883 colnames(summary_df) = c("Radius r", "Shrinkage s", "Features/Class", "Accuracy", "Sensitivity", "Specificity")
720 884 plot(0,type='n',axes=FALSE,ann=FALSE)
885 grid.table(summary_df, rows= NULL)
886
887 ## else PLS or OPLS classifier:
888 #else
889 print("PLS/OPLS classifier")
890 summary_df = as.data.frame(summary(prediction))
891 colnames(summary_df) = c("Component", "Accuracy", "Sensitivity", "Specificity")
892 summary_df = round(summary_df, digits = 2)
893 plot(0,type='n',axes=FALSE,ann=FALSE)
894 grid.table(summary_df, rows= NULL)
895
896 #end if
897
898
899 ##else for prediction without a new annotation (no calculation of accuracy):
721 #else 900 #else
722 prediction = predict(training_data,msidata) 901 prediction = predict(training_data, msidata)
723 number_levels = length(levels(training_data\$y[[1]])) 902 number_levels = length(levels(training_data@resultData@listData[[1]][["class"]]))
724 #end if 903 #end if
725 904
726 ## colours selection: 905 ## colours selection:
727 906
728 #if str($colour_conditional.colour_type) == "manual_colour" 907 #if str($colour_conditional.colour_type) == "manual_colour"
729 #set $color_string = ','.join(['"%s"' % $color.annotation_color for $color in $colour_conditional.colours]) 908 #set $color_string = ','.join(['"%s"' % $color.annotation_color for $color in $colour_conditional.colours])
730 colourvector = c($color_string) 909 colourvector = c($color_string)
731 910
732 #elif str($colour_conditional.colour_type) == "colourpalette" 911 #elif str($colour_conditional.colour_type) == "colourpalette"
733 colourvector = noquote($colour_conditional.palettes)(number_levels) 912 colourvector = noquote($colour_conditional.palettes)(number_levels)
734 913
735 #end if 914 #end if
736 915
737 ## m/z and pixel information output 916 ## m/z and pixel information output
738 predicted_classes = data.frame(prediction\$classes[[1]]) 917 predicted_classes = data.frame(prediction@resultData@listData[[1]][["class"]])
739 pixel_names = gsub(", y = ", "_", names(pixels(msidata))) 918 msidata_coordinates = cbind(coord(msidata)[,1:2], c(1:ncol(msidata)))
740 pixel_names = gsub(" = ", "y_", pixel_names) 919 colnames(msidata_coordinates)[3] = "pixel_index"
741 x_coordinates = matrix(unlist(strsplit(pixel_names, "_")), ncol=3, byrow=TRUE)[,2] 920 x_coords = msidata_coordinates@listData[["x"]]
742 y_coordinates = matrix(unlist(strsplit(pixel_names, "_")), ncol=3, byrow=TRUE)[,3] 921 y_coords = msidata_coordinates@listData[["y"]]
743 predicted_classes2 = data.frame(pixel_names, x_coordinates, y_coordinates, predicted_classes) 922 pixel_names = paste0("xy_", x_coords, "_", y_coords)
923 predicted_classes2 = data.frame(pixel_names, x_coords, y_coords, predicted_classes)
744 colnames(predicted_classes2) = c("pixel names", "x", "y","predicted condition") 924 colnames(predicted_classes2) = c("pixel names", "x", "y","predicted condition")
745 predicted_toplabels = topFeatures(prediction, n=Inf) 925
746 if (colnames(predicted_toplabels)[4] == "coefficients"){ 926 ##topFeatures only available for SSC; for PLS and OPLS coefficients loading and weights are provided
747 predicted_toplabels[,4:6] <-round(predicted_toplabels[,4:6],5) 927
748 }else{ 928 #if str($type_cond.classification_type) == "SSC_classifier":
749 predicted_toplabels[,6:9] <-round(predicted_toplabels[,6:9],5)} 929 predicted_toplabels = topFeatures(prediction, n=$type_cond.classification_type_cond.predicted_toplabels)
750 930 predicted_toplabels <- as.data.frame(predicted_toplabels)
931 predicted_toplabels[,6:7] <-round(predicted_toplabels[,6:7], digits = 5)
932 write.table(predicted_toplabels, file="$mzfeatures", quote = FALSE, row.names = FALSE, col.names=TRUE, sep = "\t")
933
934 #else
935 ## if PLS or OPLS classifier, coefficients, loadings, and weights instead of topFeatures
936 coefficients.df = as.data.frame(prediction@resultData@listData[[1]][["coefficients"]])
937 row_names <- prediction@featureData@mz
938 coefficients.df <- cbind("mz" = row_names, coefficients.df)
939 write.table(coefficients.df, file = "$coefficients", quote = FALSE, sep = "\t", row.names = FALSE)
940
941 ## add loadings and weights table
942 loadings.df = as.data.frame(prediction@resultData@listData[[1]][["loadings"]])
943 loadings.df <- cbind("mz" = row_names, loadings.df)
944 new_names <- paste0("loadings_", names(loadings.df)[-1])
945 names(loadings.df)[-1] <- new_names
946
947 weights.df = as.data.frame(prediction@resultData@listData[[1]][["weights"]])
948 weights.df <- cbind("mz" = row_names, weights.df)
949 new_names <- paste0("weights_", names(weights.df)[-1])
950 names(weights.df)[-1] <- new_names
951
952 ## combine loading and weights table
953 merged.load.wei = merge(loadings.df, weights.df, by = "mz")
954 write.table(merged.load.wei, file = "$loadings_weights", quote = FALSE, sep = "\t", row.names = FALSE)
955
956 #end if
957
751 ##predicted classes 958 ##predicted classes
752 prediction_df = cbind(coord(prediction)[,1:2], predicted_classes) 959 prediction_df = as.data.frame(cbind(coord(prediction)[,1:2], predicted_classes))
753 colnames(prediction_df) = c("x", "y", "predicted_classes") 960 colnames(prediction_df) = c("x", "y", "predicted_classes")
754 961
755 #if str($type_cond.classification_type) == "SSC_classifier": 962 #if str($type_cond.classification_type) == "SSC_classifier":
756 ## this seems to work only for SSC, therefore overwrite tables 963 ## this seems to work only for SSC, therefore overwrite tables
757 predicted_probabilities = data.frame(prediction\$probabilities[[1]]) 964 predicted_probabilities = data.frame(prediction@resultData@listData[[1]][["probability"]])
758 predicted_classes2 = data.frame(pixel_names, x_coordinates, y_coordinates, predicted_classes, predicted_probabilities) 965 predicted_classes2 = data.frame(pixel_names, x_coords, y_coords, predicted_classes, predicted_probabilities)
759 colnames(predicted_classes2) = c("pixel names", "x", "y","predicted condition", levels(prediction\$classes[[1]])) 966 colnames(predicted_classes2) = c("pixel names", "x", "y","predicted condition", levels(prediction@resultData@listData[[1]][["class"]]))
760 ## also image modes are specific to SSC 967 ## also image modes are specific to SSC
761 image(prediction, mode="classes", layout=c(1,1), main="Class", ylim= c(maximumy+0.2*maximumy,minimumy-0.2*minimumy), col=colourvector) 968 print(predicted_classes2[1:5,])
762 image(prediction, mode="probabilities", layout=c(1,1), main="Class probabilities",ylim= c(maximumy+0.2*maximumy,minimumy-0.2*minimumy), col=colourvector) 969 print(image(prediction, values="class", layout=c(1,1), main="Class Prediction", ylim= c(maximumy+0.2*maximumy,minimumy-0.2*minimumy), col=colourvector))
763 970 print(image(prediction, values="probability", layout=c(1,1), main="Class Probabilities",ylim= c(maximumy+0.2*maximumy,minimumy-0.2*minimumy), col=colourvector))
764 #else 971
765 972 #else
766 prediction_plot = ggplot(prediction_df, aes(x=x, y=y, fill=predicted_classes))+ 973 prediction_plot = ggplot(prediction_df, aes(x=x, y=y, fill=predicted_classes))+
767 geom_tile()+ 974 geom_tile()+
768 coord_fixed()+ 975 coord_fixed()+
769 ggtitle("Predicted condition for each spectrum")+ 976 ggtitle("Predicted condition for each spectrum")+
770 theme_bw()+ 977 theme_bw()+
771 theme( 978 theme(
775 theme(text=element_text(family="ArialMT", face="bold", size=15))+ 982 theme(text=element_text(family="ArialMT", face="bold", size=15))+
776 theme(legend.position="bottom", legend.direction="vertical")+ 983 theme(legend.position="bottom", legend.direction="vertical")+
777 guides(fill=guide_legend(ncol=4, byrow=TRUE))+ 984 guides(fill=guide_legend(ncol=4, byrow=TRUE))+
778 scale_discrete_manual(aesthetics = c("colour", "fill"), values = colourvector) 985 scale_discrete_manual(aesthetics = c("colour", "fill"), values = colourvector)
779 coord_labels = aggregate(cbind(x,y)~predicted_classes, data=prediction_df, mean, na.rm=TRUE, na.action="na.pass") 986 coord_labels = aggregate(cbind(x,y)~predicted_classes, data=prediction_df, mean, na.rm=TRUE, na.action="na.pass")
780 coord_labels\$file_number = gsub( "_.*§", "", coord_labels\$predicted_classes) 987 ##coord_labels\$file_number = gsub( "_.*§", "", coord_labels\$predicted_classes)
781 print(prediction_plot) 988 print(prediction_plot)
782 #end if 989 #end if
783 990
784 write.table(predicted_toplabels, file="$mzfeatures", quote = FALSE, row.names = FALSE, col.names=TRUE, sep = "\t")
785 write.table(predicted_classes2, file="$pixeloutput", quote = FALSE, row.names = FALSE, col.names=TRUE, sep = "\t")
786
787
788
789
790 #if str($type_cond.new_y_values_cond.new_y_values) == "new_response": 991 #if str($type_cond.new_y_values_cond.new_y_values) == "new_response":
791 ## image with right and wrong classes: 992 ## image with right and wrong classes:
792 993 comparison_df = as.data.frame(cbind(prediction_df, new_y_vector))
793 comparison_df = cbind(prediction_df, new_y_vector) 994 colnames(comparison_df) = c("x", "y", "predicted_class", "annotated_class")
794 comparison_df\$correct<- ifelse(comparison_df\$predicted_classes==comparison_df\$new_y_vector, T, F) 995 comparison_df\$predicted_class = ifelse(is.na(comparison_df\$predicted_class), "NA", as.character(comparison_df\$predicted_class))
795 correctness = round(sum(comparison_df\$correct)/length(comparison_df\$correct)*100,2) 996 comparison_df\$correct <- ifelse(comparison_df\$predicted_class==comparison_df\$annotated_class, T, F)
796 997 correctness = round(sum(comparison_df\$correct, na.rm = TRUE)/length(comparison_df\$correct)*100,2)
797 correctness_plot = ggplot(comparison_df, aes(x=x, y=y, fill=correct))+ 998
999 correctness_plot = ggplot(comparison_df, aes(x=x, y=y, fill=correct))+
798 geom_tile()+ 1000 geom_tile()+
799 scale_fill_manual(values = c("TRUE" = "orange","FALSE" = "darkblue"))+ 1001 scale_fill_manual(values = c("TRUE" = "orange","FALSE" = "darkblue"))+
800 coord_fixed()+ 1002 coord_fixed()+
801 ggtitle(paste0("Correctness of classification: ",correctness, "%"))+ 1003 ggtitle(paste0("Correctness of classification: ", correctness, " %"))+
802 theme_bw()+ 1004 theme_bw()+
803 theme(text=element_text(family="ArialMT", face="bold", size=15))+ 1005 theme(text=element_text(family="ArialMT", face="bold", size=15))+
804 theme(legend.position="bottom",legend.direction="vertical")+ 1006 theme(legend.position="bottom",legend.direction="vertical")+
805 guides(fill=guide_legend(ncol=2,byrow=TRUE)) 1007 guides(fill=guide_legend(ncol=2,byrow=TRUE))
806 print(correctness_plot) 1008 print(correctness_plot)
807 #end if 1009 #end if
1010
1011 ## pixel output
1012 #if str($type_cond.new_y_values_cond.new_y_values) == "new_response":
1013 print("new response output")
1014 write.table(comparison_df, file="$pixeloutput", quote = FALSE, row.names = FALSE, col.names=TRUE, sep = "\t")
1015
1016 #else
1017 write.table(predicted_classes2, file="$pixeloutput", quote = FALSE, row.names = FALSE, col.names=TRUE, sep = "\t")
1018
1019 #end if
808 1020
809 ## optional output as .RData 1021 ## optional output as .RData
810 #if $output_rdata: 1022 #if $output_rdata:
811 msidata = prediction 1023 msidata = prediction
812 save(msidata, file="$classification_rdata") 1024 save(msidata, file="$classification_rdata")
815 #end if 1027 #end if
816 1028
817 dev.off() 1029 dev.off()
818 1030
819 }else{ 1031 }else{
1032 plot.new()
1033 text(0.5, 0.5, "Inputfile has no intensities > 0 \n or contains NA values.", cex = 1.5)
820 print("Inputfile has no intensities > 0 or contains NA values") 1034 print("Inputfile has no intensities > 0 or contains NA values")
821 dev.off() 1035 dev.off()
822 } 1036 }
823 1037
824 1038
862 </when> 1076 </when>
863 <when value="PLS_analysis"> 1077 <when value="PLS_analysis">
864 <param name="pls_comp" type="integer" value="5" 1078 <param name="pls_comp" type="integer" value="5"
865 label="The optimal number of PLS-DA components as indicated by cross-validations (minimum is 1)" help="Run cvApply first to optain optimal number of PLS-DA components"/> 1079 label="The optimal number of PLS-DA components as indicated by cross-validations (minimum is 1)" help="Run cvApply first to optain optimal number of PLS-DA components"/>
866 <param name="pls_scale" type="boolean" label="Data scaling" truevalue="TRUE" falsevalue="FALSE"/> 1080 <param name="pls_scale" type="boolean" label="Data scaling" truevalue="TRUE" falsevalue="FALSE"/>
867 <param name="pls_toplabels" type="integer" value="100" 1081 <param name="PLS_Yweights" type="boolean" label="Y weights" help="Y weights represent the coefficients associated with the response variables and are used to model the relationship between predictors and responses in the context of classification. They represent the importance of each response variable in predicting each component. They can be useful if you have multiple response variables."/>
868 label="Number of toplabels (m/z features) which should be written in tabular output"/> 1082 <!--param name="pls_toplabels" type="integer" value="100
1083 label="Number of toplabels (m/z features) which should be written in tabular output"/-->
869 </when> 1084 </when>
870 </conditional> 1085 </conditional>
871 </when> 1086 </when>
872 1087
873 <when value="OPLS"> 1088 <when value="OPLS">
889 <when value="opls_analysis"> 1104 <when value="opls_analysis">
890 <param name="opls_comp" type="integer" value="5" 1105 <param name="opls_comp" type="integer" value="5"
891 label="The optimal number of OPLS-DA components as indicated by cross-validations (minimum is 1)" help="Run cvApply first to optain optimal number of OPLS-DA components"/> 1106 label="The optimal number of OPLS-DA components as indicated by cross-validations (minimum is 1)" help="Run cvApply first to optain optimal number of OPLS-DA components"/>
892 <!--param name="xnew" type="boolean" truevalue="TRUE" falsevalue="FALSE" label="Keep new matrix"/--> 1107 <!--param name="xnew" type="boolean" truevalue="TRUE" falsevalue="FALSE" label="Keep new matrix"/-->
893 <param name="opls_scale" type="boolean" truevalue="TRUE" falsevalue="FALSE" label="Data scaling"/> 1108 <param name="opls_scale" type="boolean" truevalue="TRUE" falsevalue="FALSE" label="Data scaling"/>
1109 <param name="OPLS_Yweights" type="boolean" label="Y weights" help="Y weights represent the coefficients associated with the response variables and are used to model the relationship between predictors and responses in the context of classification. They represent the importance of each response variable in predicting each component. They can be useful if you have multiple response variables."/>
894 <!--param name="opls_toplabels" type="integer" value="100" 1110 <!--param name="opls_toplabels" type="integer" value="100"
895 label="Number of toplabels (m/z features) which should be written in tabular output"/--> 1111 label="Number of toplabels (m/z features) which should be written in tabular output"/-->
896 </when> 1112 </when>
897 </conditional> 1113 </conditional>
898 </when> 1114 </when>
903 <option value="ssc_cvapply" selected="True">cvApply</option> 1119 <option value="ssc_cvapply" selected="True">cvApply</option>
904 <option value="ssc_analysis">spatial shrunken centroids analysis</option> 1120 <option value="ssc_analysis">spatial shrunken centroids analysis</option>
905 </param> 1121 </param>
906 <when value="ssc_cvapply"> 1122 <when value="ssc_cvapply">
907 <param name="write_best_params" type="boolean" label="Write out best r and s values" help="Can be used to generate automatic classification workflow"/> 1123 <param name="write_best_params" type="boolean" label="Write out best r and s values" help="Can be used to generate automatic classification workflow"/>
1124 <param name="ssc_cv_accuracy_plot" type="boolean" label="Plot CV accuracy plots on one page (=Yes) or individual pages (=No)"/>
908 </when> 1125 </when>
909 <when value="ssc_analysis"> 1126 <when value="ssc_analysis">
910 <!--param name="ssc_toplabels" type="integer" value="100" 1127 <param name="ssc_toplabels" type="integer" value="100"
911 label="Number of toplabels (m/z features) which should be written in tabular output"/--> 1128 label="Number of toplabels (m/z features) which should be written in tabular output"/>
912 </when> 1129 </when>
913 </conditional> 1130 </conditional>
914 <param name="ssc_r" type="text" value="2" 1131 <param name="ssc_r" type="text" value="2"
915 label="The spatial neighborhood radius of nearby pixels to consider (r)" help="For cvapply multiple values are allowed (e.g. 0,1,2,3 or 2:5)"> 1132 label="The spatial neighborhood radius of nearby pixels to consider (r)" help="For cvapply multiple values are allowed (e.g. 0,1,2,3 or 2:5)">
916 <expand macro="sanitizer_multiple_digits"/> 1133 <expand macro="sanitizer_multiple_digits"/>
920 <expand macro="sanitizer_multiple_digits"/> 1137 <expand macro="sanitizer_multiple_digits"/>
921 </param> 1138 </param>
922 <param name="ssc_kernel_method" type="select" display="radio" label = "The method to use to calculate the spatial smoothing kernels for the embedding. The 'gaussian' method refers to spatially-aware (SA) weights, and 'adaptive' refers to spatially-aware structurally-adaptive (SASA) weights"> 1139 <param name="ssc_kernel_method" type="select" display="radio" label = "The method to use to calculate the spatial smoothing kernels for the embedding. The 'gaussian' method refers to spatially-aware (SA) weights, and 'adaptive' refers to spatially-aware structurally-adaptive (SASA) weights">
923 <option value="gaussian">gaussian</option> 1140 <option value="gaussian">gaussian</option>
924 <option value="adaptive" selected="True">adaptive</option> 1141 <option value="adaptive" selected="True">adaptive</option>
925 </param> 1142 </param>
926 </when> 1143 </when>
927 </conditional> 1144 </conditional>
928 1145
929 </when> 1146 </when>
930 1147
931 <when value="prediction"> 1148 <when value="prediction">
932 <param name="training_result" type="data" format="rdata" label="Result from previous classification training"/> 1149 <param name="training_result" type="data" format="rdata" label="Result from previous classification training"/>
933 <!--param name="predicted_toplabels" type="integer" value="100" 1150 <conditional name="classification_type_cond">
934 label="Number of toplabels (m/z features) which should be written in tabular output"/--> 1151 <param name="classification_type" type="select" label="Which classification method was used">
935 <param name="classification_type" type="select" display="radio" optional="False" label="Which classification method was used"> 1152 <option value="PLS_classifier" selected="True" >PLS classifier</option>
936 <option value="PLS_classifier" selected="True" >PLS classifier</option> 1153 <option value="OPLS_classifier">OPLS classifier</option>
937 <option value="OPLS_classifier">OPLS classifier</option> 1154 <option value="SSC_classifier">SSC classifier</option>
938 <option value="SSC_classifier">SSC_classifier</option> 1155 </param>
939 </param> 1156 <when value="PLS_classifier"/>
1157 <when value="OPLS_classifier"/>
1158 <when value="SSC_classifier">
1159 <param name="predicted_toplabels" type="integer" value="100"
1160 label="Number of toplabels (m/z features) which should be written in tabular output"/>
1161 </when>
1162 </conditional>
940 <conditional name="new_y_values_cond"> 1163 <conditional name="new_y_values_cond">
941 <param name="new_y_values" type="select" label="Load annotations (optional, but allows accuracy calculations)"> 1164 <param name="new_y_values" type="select" label="Load annotations (optional, but allows accuracy calculations)">
942 <option value="no_new_response" selected="True">no</option> 1165 <option value="no_new_response" selected="True">no</option>
943 <option value="new_response">use annotations</option> 1166 <option value="new_response">use annotations</option>
944 </param> 1167 </param>
982 </conditional> 1205 </conditional>
983 <param name="output_rdata" type="boolean" label="Results as .RData output" help="Can be used to generate a classification prediction on new data"/> 1206 <param name="output_rdata" type="boolean" label="Results as .RData output" help="Can be used to generate a classification prediction on new data"/>
984 </inputs> 1207 </inputs>
985 <outputs> 1208 <outputs>
986 <data format="pdf" name="classification_images" from_work_dir="classificationpdf.pdf" label = "${tool.name} on ${on_string}: results"/> 1209 <data format="pdf" name="classification_images" from_work_dir="classificationpdf.pdf" label = "${tool.name} on ${on_string}: results"/>
987 <data format="tabular" name="mzfeatures" label="${tool.name} on ${on_string}: features"/> 1210 <data format="tabular" name="mzfeatures" label="${tool.name} on ${on_string}: features">
988 <data format="tabular" name="pixeloutput" label="${tool.name} on ${on_string}: pixels"/> 1211 <filter>type_cond['type_method'] == 'training' and type_cond['method_cond']['class_method'] == 'spatialShrunkenCentroids' and type_cond['method_cond']['ssc_analysis_cond']['ssc_method'] == 'ssc_analysis' or type_cond['type_method'] == 'prediction' and type_cond['classification_type_cond']['classification_type'] == 'SSC_classifier'</filter>
1212 </data>
1213 <data format="tabular" name="pixeloutput" label="${tool.name} on ${on_string}: pixels">
1214 <filter>type_cond['type_method'] == 'training' and type_cond['method_cond']['class_method'] == 'PLS' and type_cond['method_cond']['analysis_cond']['PLS_method'] == 'PLS_analysis' or type_cond['type_method'] == 'training' and type_cond['method_cond']['class_method'] == 'OPLS' and type_cond['method_cond']['opls_analysis_cond']['opls_method'] == 'opls_analysis' or type_cond['type_method'] == 'training' and type_cond['method_cond']['class_method'] == 'spatialShrunkenCentroids' and type_cond['method_cond']['ssc_analysis_cond']['ssc_method'] == 'ssc_analysis' or type_cond['type_method'] == 'prediction'</filter>
1215 </data>
1216 <data format="tabular" name="coefficients" label="${tool.name} on ${on_string}: coefficients">
1217 <filter>type_cond['type_method'] == 'training' and type_cond['method_cond']['class_method'] == 'PLS' and type_cond['method_cond']['analysis_cond']['PLS_method'] == 'PLS_analysis' or type_cond['type_method'] == 'training' and type_cond['method_cond']['class_method'] == 'OPLS' and type_cond['method_cond']['opls_analysis_cond']['opls_method'] == 'opls_analysis' or type_cond['type_method'] == 'prediction' and type_cond['classification_type_cond']['classification_type'] == 'PLS_classifier' or type_cond['type_method'] == 'prediction' and type_cond['classification_type_cond']['classification_type'] == 'OPLS_classifier'</filter>
1218 </data>
1219 <data format="tabular" name="loadings_weights" label="${tool.name} on ${on_string}: loadings and weights">
1220 <filter>type_cond['type_method'] == 'training' and type_cond['method_cond']['class_method'] == 'PLS' and type_cond['method_cond']['analysis_cond']['PLS_method'] == 'PLS_analysis' or type_cond['type_method'] == 'training' and type_cond['method_cond']['class_method'] == 'OPLS' and type_cond['method_cond']['opls_analysis_cond']['opls_method'] == 'opls_analysis' or type_cond['type_method'] == 'prediction' and type_cond['classification_type_cond']['classification_type'] == 'PLS_classifier' or type_cond['type_method'] == 'prediction' and type_cond['classification_type_cond']['classification_type'] == 'OPLS_classifier'</filter>
1221 </data>
989 <data format="txt" name="best_r" label="${tool.name} on ${on_string}:best r"> 1222 <data format="txt" name="best_r" label="${tool.name} on ${on_string}:best r">
990 <filter>type_cond['type_method'] == 'training' and type_cond['method_cond']['class_method'] == 'spatialShrunkenCentroids' and type_cond['method_cond']['ssc_analysis_cond']['ssc_method'] == 'ssc_cvapply' and type_cond['method_cond']['ssc_analysis_cond']['write_best_params']</filter> 1223 <filter>type_cond['type_method'] == 'training' and type_cond['method_cond']['class_method'] == 'spatialShrunkenCentroids' and type_cond['method_cond']['ssc_analysis_cond']['ssc_method'] == 'ssc_cvapply' and type_cond['method_cond']['ssc_analysis_cond']['write_best_params']</filter>
991 </data> 1224 </data>
992 <data format="txt" name="best_s" label="${tool.name} on ${on_string}:best s"> 1225 <data format="txt" name="best_s" label="${tool.name} on ${on_string}:best s">
993 <filter>type_cond['type_method'] == 'training' and type_cond['method_cond']['class_method'] == 'spatialShrunkenCentroids' and type_cond['method_cond']['ssc_analysis_cond']['ssc_method'] == 'ssc_cvapply' and type_cond['method_cond']['ssc_analysis_cond']['write_best_params']</filter> 1226 <filter>type_cond['type_method'] == 'training' and type_cond['method_cond']['class_method'] == 'spatialShrunkenCentroids' and type_cond['method_cond']['ssc_analysis_cond']['ssc_method'] == 'ssc_cvapply' and type_cond['method_cond']['ssc_analysis_cond']['write_best_params']</filter>
994 </data> 1227 </data>
995 <data format="rdata" name="classification_rdata" label="${tool.name} on ${on_string}: results.RData"> 1228 <data format="rdata" name="classification_rdata" label="${tool.name} on ${on_string}: results.RData">
996 <filter>output_rdata</filter> 1229 <filter>output_rdata</filter>
997 </data> 1230 </data>
998 </outputs> 1231 </outputs>
999 <tests> 1232 <tests>
1000 <test expect_num_outputs="3"> 1233 <test expect_num_outputs="1">
1001 <param name="infile" value="testfile_squares.rdata" ftype="rdata"/> 1234 <param name="infile" value="testfile_squares.rdata" ftype="rdata"/>
1002 <conditional name="type_cond"> 1235 <conditional name="type_cond">
1003 <param name="type_method" value="training"/> 1236 <param name="type_method" value="training"/>
1004 <param name="annotation_file" value= "pixel_annotation_file1.tabular" ftype="tabular"/> 1237 <param name="annotation_file" value= "pixel_annotation_file1.tabular" ftype="tabular"/>
1005 <param name="column_x" value="1"/> 1238 <param name="column_x" value="1"/>
1013 <param name="PLS_method" value="cvapply"/> 1246 <param name="PLS_method" value="cvapply"/>
1014 <param name="plscv_comp" value="2:4"/> 1247 <param name="plscv_comp" value="2:4"/>
1015 </conditional> 1248 </conditional>
1016 </conditional> 1249 </conditional>
1017 </conditional> 1250 </conditional>
1018 <output name="mzfeatures" file="features_test1.tabular"/>
1019 <output name="pixeloutput" file="pixels_test1.tabular"/>
1020 <output name="classification_images" file="test1.pdf" compare="sim_size" delta="2000"/> 1251 <output name="classification_images" file="test1.pdf" compare="sim_size" delta="2000"/>
1021 </test> 1252 </test>
1022 1253
1023 <test expect_num_outputs="4"> 1254 <test expect_num_outputs="5">
1024 <param name="infile" value="testfile_squares.rdata" ftype="rdata"/> 1255 <param name="infile" value="testfile_squares.rdata" ftype="rdata"/>
1025 <conditional name="type_cond"> 1256 <conditional name="type_cond">
1026 <param name="type_method" value="training"/> 1257 <param name="type_method" value="training"/>
1027 <param name="annotation_file" value= "pixel_annotation_file1.tabular" ftype="tabular"/> 1258 <param name="annotation_file" value= "pixel_annotation_file1.tabular" ftype="tabular"/>
1028 <param name="column_x" value="1"/> 1259 <param name="column_x" value="1"/>
1033 <param name="class_method" value="PLS"/> 1264 <param name="class_method" value="PLS"/>
1034 <conditional name="analysis_cond"> 1265 <conditional name="analysis_cond">
1035 <param name="PLS_method" value="PLS_analysis"/> 1266 <param name="PLS_method" value="PLS_analysis"/>
1036 <param name="pls_comp" value="2"/> 1267 <param name="pls_comp" value="2"/>
1037 <param name="pls_scale" value="TRUE"/> 1268 <param name="pls_scale" value="TRUE"/>
1269 <param name="PLS_Yweights" value="TRUE"/>
1038 <!--param name="pls_toplabels" value="100"/--> 1270 <!--param name="pls_toplabels" value="100"/-->
1039 </conditional> 1271 </conditional>
1040 </conditional> 1272 </conditional>
1041 </conditional> 1273 </conditional>
1042 <param name="output_rdata" value="True"/> 1274 <param name="output_rdata" value="True"/>
1043 <output name="mzfeatures" file="features_test2.tabular"/> 1275 <output name="coefficients">
1276 <assert_contents>
1277 <has_text text="900.004699707031"/>
1278 <has_text text="962.870727539062"/>
1279 <has_text text="999.606872558594"/>
1280 </assert_contents>
1281 </output>
1282 <output name="loadings_weights">
1283 <assert_contents>
1284 <has_text text="900.076354980469"/>
1285 <has_text text="950.495910644531"/>
1286 <has_text text="989.024536132812"/>
1287 </assert_contents>
1288 </output>
1044 <output name="pixeloutput" file="pixels_test2.tabular"/> 1289 <output name="pixeloutput" file="pixels_test2.tabular"/>
1045 <output name="classification_images" file="test2.pdf" compare="sim_size"/> 1290 <output name="classification_images" file="test2.pdf" compare="sim_size"/>
1046 <output name="classification_rdata" file="test2.rdata" compare="sim_size"/> 1291 <output name="classification_rdata" file="test2.rdata" compare="sim_size"/>
1047 </test> 1292 </test>
1048 1293
1049 <test expect_num_outputs="3"> 1294 <test expect_num_outputs="1">
1050 <param name="infile" value="testfile_squares.rdata" ftype="rdata"/> 1295 <param name="infile" value="testfile_squares.rdata" ftype="rdata"/>
1051 <conditional name="type_cond"> 1296 <conditional name="type_cond">
1052 <param name="type_method" value="training"/> 1297 <param name="type_method" value="training"/>
1053 <param name="annotation_file" value= "random_factors.tabular" ftype="tabular"/> 1298 <param name="annotation_file" value= "random_factors.tabular" ftype="tabular"/>
1054 <param name="column_x" value="1"/> 1299 <param name="column_x" value="1"/>
1059 <conditional name="method_cond"> 1304 <conditional name="method_cond">
1060 <param name="class_method" value="OPLS"/> 1305 <param name="class_method" value="OPLS"/>
1061 <conditional name="opls_analysis_cond"> 1306 <conditional name="opls_analysis_cond">
1062 <param name="opls_method" value="opls_cvapply"/> 1307 <param name="opls_method" value="opls_cvapply"/>
1063 <param name="opls_cvcomp" value="1:2"/> 1308 <param name="opls_cvcomp" value="1:2"/>
1064 <param name="xnew_cv" value="FALSE"/>
1065 </conditional> 1309 </conditional>
1066 </conditional> 1310 </conditional>
1067 </conditional> 1311 </conditional>
1068 <output name="mzfeatures" file="features_test3.tabular"/>
1069 <output name="pixeloutput" file="pixels_test3.tabular"/>
1070 <output name="classification_images" file="test3.pdf" compare="sim_size"/> 1312 <output name="classification_images" file="test3.pdf" compare="sim_size"/>
1071 </test> 1313 </test>
1072 1314
1073 <test expect_num_outputs="4"> 1315 <test expect_num_outputs="5">
1074 <param name="infile" value="testfile_squares.rdata" ftype="rdata"/> 1316 <param name="infile" value="testfile_squares.rdata" ftype="rdata"/>
1075 <conditional name="type_cond"> 1317 <conditional name="type_cond">
1076 <param name="type_method" value="training"/> 1318 <param name="type_method" value="training"/>
1077 <param name="annotation_file" value= "random_factors.tabular" ftype="tabular"/> 1319 <param name="annotation_file" value= "random_factors.tabular" ftype="tabular"/>
1078 <param name="column_x" value="1"/> 1320 <param name="column_x" value="1"/>
1082 <conditional name="method_cond"> 1324 <conditional name="method_cond">
1083 <param name="class_method" value="OPLS"/> 1325 <param name="class_method" value="OPLS"/>
1084 <conditional name="opls_analysis_cond"> 1326 <conditional name="opls_analysis_cond">
1085 <param name="opls_method" value="opls_analysis"/> 1327 <param name="opls_method" value="opls_analysis"/>
1086 <param name="opls_comp" value="3"/> 1328 <param name="opls_comp" value="3"/>
1087 <param name="xnew" value="FALSE"/>
1088 <param name="opls_scale" value="FALSE"/> 1329 <param name="opls_scale" value="FALSE"/>
1089 <!--param name="opls_toplabels" value="100"/--> 1330 <param name="PLS_Yweights" value="FALSE"/>
1090 </conditional> 1331 </conditional>
1091 </conditional> 1332 </conditional>
1092 </conditional> 1333 </conditional>
1093 <param name="output_rdata" value="True"/> 1334 <param name="output_rdata" value="True"/>
1094 <output name="mzfeatures" file="features_test4.tabular"/>
1095 <output name="pixeloutput" file="pixels_test4.tabular"/> 1335 <output name="pixeloutput" file="pixels_test4.tabular"/>
1336 <output name="coefficients">
1337 <assert_contents>
1338 <has_text text="900.148010253906"/>
1339 <has_text text="974.132446289062"/>
1340 <has_text text="999.908935546875"/>
1341 </assert_contents>
1342 </output>
1343 <output name="loadings_weights">
1344 <assert_contents>
1345 <has_text text="901.581848144531"/>
1346 <has_text text="939.189086914062"/>
1347 <has_text text="984.185363769531"/>
1348 </assert_contents>
1349 </output>
1096 <output name="classification_images" file="test4.pdf" compare="sim_size"/> 1350 <output name="classification_images" file="test4.pdf" compare="sim_size"/>
1097 <output name="classification_rdata" file="test4.rdata" compare="sim_size"/> 1351 <output name="classification_rdata" file="test4.rdata" compare="sim_size"/>
1098 </test> 1352 </test>
1099 1353
1100 <test expect_num_outputs="3"> 1354 <test expect_num_outputs="3">
1112 <conditional name="ssc_analysis_cond"> 1366 <conditional name="ssc_analysis_cond">
1113 <param name="ssc_method" value="ssc_cvapply"/> 1367 <param name="ssc_method" value="ssc_cvapply"/>
1114 <param name="ssc_r" value="1:2"/> 1368 <param name="ssc_r" value="1:2"/>
1115 <param name="ssc_s" value="2:3"/> 1369 <param name="ssc_s" value="2:3"/>
1116 <param name="ssc_kernel_method" value="adaptive"/> 1370 <param name="ssc_kernel_method" value="adaptive"/>
1371 <param name="write_best_params" value="TRUE"/>
1117 </conditional> 1372 </conditional>
1118 </conditional> 1373 </conditional>
1119 </conditional> 1374 </conditional>
1120 <output name="mzfeatures" file="features_test5.tabular"/>
1121 <output name="pixeloutput" file="pixels_test5.tabular"/>
1122 <output name="classification_images" file="test5.pdf" compare="sim_size"/> 1375 <output name="classification_images" file="test5.pdf" compare="sim_size"/>
1376 <output name="best_r" file="best_r_test5.txt"/>
1377 <output name="best_s" file="best_s_test5.txt"/>
1123 </test> 1378 </test>
1124 1379
1125 <test expect_num_outputs="4"> 1380 <test expect_num_outputs="4">
1126 <param name="infile" value="testfile_squares.rdata" ftype="rdata"/> 1381 <param name="infile" value="testfile_squares.rdata" ftype="rdata"/>
1127 <conditional name="type_cond"> 1382 <conditional name="type_cond">
1132 <param name="column_response" value="4"/> 1387 <param name="column_response" value="4"/>
1133 <conditional name="method_cond"> 1388 <conditional name="method_cond">
1134 <param name="class_method" value="spatialShrunkenCentroids"/> 1389 <param name="class_method" value="spatialShrunkenCentroids"/>
1135 <conditional name="ssc_analysis_cond"> 1390 <conditional name="ssc_analysis_cond">
1136 <param name="ssc_method" value="ssc_analysis"/> 1391 <param name="ssc_method" value="ssc_analysis"/>
1137 <!--param name="ssc_toplabels" value="20"/--> 1392 <param name="ssc_toplabels" value="20"/>
1138 </conditional> 1393 </conditional>
1139 <param name="ssc_r" value="2"/> 1394 <param name="ssc_r" value="2"/>
1140 <param name="ssc_s" value="2"/> 1395 <param name="ssc_s" value="2"/>
1141 <param name="ssc_kernel_method" value="adaptive"/> 1396 <param name="ssc_kernel_method" value="adaptive"/>
1142 </conditional> 1397 </conditional>
1146 <output name="pixeloutput" file="pixels_test6.tabular"/> 1401 <output name="pixeloutput" file="pixels_test6.tabular"/>
1147 <output name="classification_images" file="test6.pdf" compare="sim_size"/> 1402 <output name="classification_images" file="test6.pdf" compare="sim_size"/>
1148 <output name="classification_rdata" file="test6.rdata" compare="sim_size" delta="15000"/> 1403 <output name="classification_rdata" file="test6.rdata" compare="sim_size" delta="15000"/>
1149 </test> 1404 </test>
1150 1405
1151 <test expect_num_outputs="4"> 1406 <test expect_num_outputs="5">
1152 <param name="infile" value="testfile_squares.rdata" ftype="rdata"/> 1407 <param name="infile" value="testfile_squares.rdata" ftype="rdata"/>
1153 <conditional name="type_cond"> 1408 <conditional name="type_cond">
1154 <param name="type_method" value="prediction"/> 1409 <param name="type_method" value="prediction"/>
1410 <param name="type_method" value="prediction"/>
1155 <param name="training_result" value="test2.rdata" ftype="rdata"/> 1411 <param name="training_result" value="test2.rdata" ftype="rdata"/>
1412 <param name="classification_type" value="PLS_classifier"/>
1156 <conditional name="new_y_values_cond"> 1413 <conditional name="new_y_values_cond">
1157 <param name="new_y_values" value="new_response"/> 1414 <param name="new_y_values" value="new_response"/>
1158 <param name="new_response_file" value="pixel_annotation_file1.tabular" ftype="tabular"/> 1415 <param name="new_response_file" value="pixel_annotation_file1.tabular" ftype="tabular"/>
1159 <param name="column_new_x" value="1"/> 1416 <param name="column_new_x" value="1"/>
1160 <param name="column_new_y" value="2"/> 1417 <param name="column_new_y" value="2"/>
1161 <param name="column_new_response" value="4"/> 1418 <param name="column_new_response" value="4"/>
1162 <param name="new_tabular_header" value="False"/> 1419 <param name="new_tabular_header" value="False"/>
1163 </conditional> 1420 </conditional>
1164 </conditional> 1421 </conditional>
1165 <param name="output_rdata" value="True"/> 1422 <param name="output_rdata" value="True"/>
1166 <output name="mzfeatures" file="features_test7.tabular"/> 1423 <output name="coefficients" file="coefficients_test7.tabular"/>
1424 <output name="loadings_weights" file="loadings_and_weights_test7.tabular"/>
1167 <output name="pixeloutput" file="pixels_test7.tabular"/> 1425 <output name="pixeloutput" file="pixels_test7.tabular"/>
1168 <output name="classification_images" file="test7.pdf" compare="sim_size"/> 1426 <output name="classification_images" file="test7.pdf" compare="sim_size"/>
1169 <output name="classification_rdata" file="test7.rdata" compare="sim_size" /> 1427 <output name="classification_rdata" file="test7.rdata" compare="sim_size" />
1170 </test> 1428 </test>
1171 </tests> 1429 </tests>
1221 1479
1222 1480
1223 **Tips** 1481 **Tips**
1224 1482
1225 - The classification function will only run on files with valid intensity values (NA are not allowed) 1483 - The classification function will only run on files with valid intensity values (NA are not allowed)
1226 - Only a single input file is accepted, several files have to be combined previously, for example with the MSI combine tool. 1484 - Only a single input file is accepted, several files have to be combined previously, for example with the MSI combine tool.
1227 1485
1228 1486
1229 **Output** 1487 **Output**
1230 1488
1231 - Pdf with the heatmaps and plots for the classification 1489 - Pdf with the heatmaps and plots for the classification