comparison classification.xml @ 14:ece627528a78 draft

"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cardinal commit 888b3e991d0752b694bf480531ce0e5318c2f337-dirty"
author galaxyp
date Fri, 07 May 2021 10:10:35 +0000
parents 24c000517173
children f28ad96b76dc
comparison
equal deleted inserted replaced
13:b18329a8ac14 14:ece627528a78
1 <tool id="cardinal_classification" name="MSI classification" version="@VERSION@.0"> 1 <tool id="cardinal_classification" name="MSI classification" version="@VERSION@.1">
2 <description>spatial classification of mass spectrometry imaging data</description> 2 <description>spatial classification of mass spectrometry imaging data</description>
3 <macros> 3 <macros>
4 <import>macros.xml</import> 4 <import>macros.xml</import>
5 </macros> 5 </macros>
6 <expand macro="requirements"> 6 <expand macro="requirements">
23 ################################# load libraries and read file ######################### 23 ################################# load libraries and read file #########################
24 24
25 library(Cardinal) 25 library(Cardinal)
26 library(gridExtra) 26 library(gridExtra)
27 library(ggplot2) 27 library(ggplot2)
28 28 library(scales)
29 29
30 @READING_MSIDATA@ 30 @READING_MSIDATA@
31 31
32 msidata = as(msidata, "MSImageSet") ##coercion to MSImageSet 32 msidata = as(msidata, "MSImageSet") ##coercion to MSImageSet
33 33
55 55
56 title(main=paste0(Title," for file: \n\n", "$infile.display_name")) 56 title(main=paste0(Title," for file: \n\n", "$infile.display_name"))
57 57
58 58
59 ##################### I) numbers and control plots ############################# 59 ##################### I) numbers and control plots #############################
60 ############################################################################### 60 ################################################################################
61 61
62 ## table with values 62 ## table with values
63 grid.table(property_df, rows= NULL) 63 grid.table(property_df, rows= NULL)
64 64
65 65
66 if (npeaks > 0 && sum(is.na(spectra(msidata)))==0){ 66 if (npeaks > 0 && sum(is.na(spectra(msidata)))==0){
67 67
68 opar <- par() 68 opar <- par()
69 69
70 ######################## II) Training ############################# 70 ######################## II) Training #######################################
71 ############################################################################# 71 #############################################################################
72 #if str( $type_cond.type_method) == "training": 72 #if str( $type_cond.type_method) == "training":
73 print("training") 73 print("training")
74 74
75 75
88 msidata_coordinates = cbind(coord(msidata)[,1:2], c(1:ncol(msidata))) 88 msidata_coordinates = cbind(coord(msidata)[,1:2], c(1:ncol(msidata)))
89 colnames(msidata_coordinates)[3] = "pixel_index" 89 colnames(msidata_coordinates)[3] = "pixel_index"
90 merged_response = merge(msidata_coordinates, y_input, by=c("x", "y"), all.x=TRUE) 90 merged_response = merge(msidata_coordinates, y_input, by=c("x", "y"), all.x=TRUE)
91 merged_response[is.na(merged_response)] = "NA" 91 merged_response[is.na(merged_response)] = "NA"
92 merged_response = merged_response[order(merged_response\$pixel_index),] 92 merged_response = merged_response[order(merged_response\$pixel_index),]
93 y_vector = as.factor(merged_response[,4]) 93 conditions = as.factor(merged_response[,4])
94 y_vector = conditions
94 95
95 ## plot of y vector 96 ## plot of y vector
96 97
97 position_df = cbind(coord(msidata)[,1:2], y_vector) 98 position_df = cbind(coord(msidata)[,1:2], conditions)
98 y_plot = ggplot(position_df, aes(x=x, y=y, fill=y_vector))+ 99 y_plot = ggplot(position_df, aes(x=x, y=y, fill=conditions))+
99 geom_tile() + 100 geom_tile() +
100 coord_fixed()+ 101 coord_fixed()+
101 ggtitle("Distribution of the response variable y")+ 102 ggtitle("Distribution of the conditions")+
102 theme_bw()+ 103 theme_bw()+
104 theme(
105 plot.background = element_blank(),
106 panel.grid.major = element_blank(),
107 panel.grid.minor = element_blank())+
103 theme(text=element_text(family="ArialMT", face="bold", size=15))+ 108 theme(text=element_text(family="ArialMT", face="bold", size=15))+
104 theme(legend.position="bottom",legend.direction="vertical")+ 109 theme(legend.position="bottom",legend.direction="vertical")+
105 guides(fill=guide_legend(ncol=4,byrow=TRUE)) 110 guides(fill=guide_legend(ncol=4,byrow=TRUE))
106 coord_labels = aggregate(cbind(x,y)~y_vector, data=position_df, mean, na.rm=TRUE, na.action="na.pass") 111 coord_labels = aggregate(cbind(x,y)~conditions, data=position_df, mean, na.rm=TRUE, na.action="na.pass")
107 coord_labels\$file_number = gsub( "_.*$", "", coord_labels\$y_vector) 112 coord_labels\$file_number = gsub( "_.*$", "", coord_labels\$conditions)
108 print(y_plot) 113 print(y_plot)
109 114
110 115
111 ## plot of folds 116 ## plot of folds
112 117
117 position_df = cbind(coord(msidata)[,1:2], fold_vector) 122 position_df = cbind(coord(msidata)[,1:2], fold_vector)
118 fold_plot = ggplot(position_df, aes(x=x, y=y, fill=fold_vector))+ 123 fold_plot = ggplot(position_df, aes(x=x, y=y, fill=fold_vector))+
119 geom_tile() + 124 geom_tile() +
120 coord_fixed()+ 125 coord_fixed()+
121 ggtitle("Distribution of the fold variable")+ 126 ggtitle("Distribution of the fold variable")+
122 theme_bw()+ 127 theme_bw()+
128 theme(
129 plot.background = element_blank(),
130 panel.grid.major = element_blank(),
131 panel.grid.minor = element_blank())+
123 theme(text=element_text(family="ArialMT", face="bold", size=15))+ 132 theme(text=element_text(family="ArialMT", face="bold", size=15))+
124 theme(legend.position="bottom",legend.direction="vertical")+ 133 theme(legend.position="bottom",legend.direction="vertical")+
125 guides(fill=guide_legend(ncol=4,byrow=TRUE)) 134 guides(fill=guide_legend(ncol=4,byrow=TRUE))
126 coord_labels = aggregate(cbind(x,y)~fold_vector, data=position_df, mean, na.rm=TRUE, na.action="na.pass") 135 coord_labels = aggregate(cbind(x,y)~fold_vector, data=position_df, mean, na.rm=TRUE, na.action="na.pass")
127 coord_labels\$file_number = gsub( "_.*$", "", coord_labels\$fold_vector) 136 coord_labels\$file_number = gsub( "_.*$", "", coord_labels\$fold_vector)
274 283
275 prediction_plot = ggplot(prediction_df, aes(x=x, y=y, fill=predicted_classes))+ 284 prediction_plot = ggplot(prediction_df, aes(x=x, y=y, fill=predicted_classes))+
276 geom_tile() + 285 geom_tile() +
277 coord_fixed()+ 286 coord_fixed()+
278 ggtitle("Predicted condition for each pixel")+ 287 ggtitle("Predicted condition for each pixel")+
279 theme_bw()+ 288 theme_bw()+
289 theme(
290 plot.background = element_blank(),
291 panel.grid.major = element_blank(),
292 panel.grid.minor = element_blank())+
280 theme(text=element_text(family="ArialMT", face="bold", size=15))+ 293 theme(text=element_text(family="ArialMT", face="bold", size=15))+
281 theme(legend.position="bottom",legend.direction="vertical")+ 294 theme(legend.position="bottom",legend.direction="vertical")+
282 guides(fill=guide_legend(ncol=4,byrow=TRUE)) 295 guides(fill=guide_legend(ncol=4,byrow=TRUE))
283 coord_labels = aggregate(cbind(x,y)~predicted_classes, data=prediction_df, mean, na.rm=TRUE, na.action="na.pass") 296 coord_labels = aggregate(cbind(x,y)~predicted_classes, data=prediction_df, mean, na.rm=TRUE, na.action="na.pass")
284 coord_labels\$file_number = gsub( "_.*$", "", coord_labels\$predicted_classes) 297 coord_labels\$file_number = gsub( "_.*$", "", coord_labels\$predicted_classes)
441 454
442 prediction_plot = ggplot(prediction_df, aes(x=x, y=y, fill=predicted_classes))+ 455 prediction_plot = ggplot(prediction_df, aes(x=x, y=y, fill=predicted_classes))+
443 geom_tile() + 456 geom_tile() +
444 coord_fixed()+ 457 coord_fixed()+
445 ggtitle("Predicted condition for each pixel")+ 458 ggtitle("Predicted condition for each pixel")+
446 theme_bw()+ 459 theme_bw()+
460 theme(
461 plot.background = element_blank(),
462 panel.grid.major = element_blank(),
463 panel.grid.minor = element_blank())+
447 theme(text=element_text(family="ArialMT", face="bold", size=15))+ 464 theme(text=element_text(family="ArialMT", face="bold", size=15))+
448 theme(legend.position="bottom",legend.direction="vertical")+ 465 theme(legend.position="bottom",legend.direction="vertical")+
449 guides(fill=guide_legend(ncol=4,byrow=TRUE)) 466 guides(fill=guide_legend(ncol=4,byrow=TRUE))
450 coord_labels = aggregate(cbind(x,y)~predicted_classes, data=prediction_df, mean, na.rm=TRUE, na.action="na.pass") 467 coord_labels = aggregate(cbind(x,y)~predicted_classes, data=prediction_df, mean, na.rm=TRUE, na.action="na.pass")
451 coord_labels\$file_number = gsub( "_.*$", "", coord_labels\$predicted_classes) 468 coord_labels\$file_number = gsub( "_.*$", "", coord_labels\$predicted_classes)
505 s_value = as.numeric(substring(unlist(strsplit(best_params, ","))[3], 5)) ## remove space 522 s_value = as.numeric(substring(unlist(strsplit(best_params, ","))[3], 5)) ## remove space
506 minimumy = min(coord(msidata.cv.ssc)[,2]) 523 minimumy = min(coord(msidata.cv.ssc)[,2])
507 maximumy = max(coord(msidata.cv.ssc)[,2]) 524 maximumy = max(coord(msidata.cv.ssc)[,2])
508 image(msidata.cv.ssc, model = list( r = r_value, s = s_value ), ylim= c(maximumy+0.2*maximumy,minimumy-0.2*minimumy),layout=c(1,1)) 525 image(msidata.cv.ssc, model = list( r = r_value, s = s_value ), ylim= c(maximumy+0.2*maximumy,minimumy-0.2*minimumy),layout=c(1,1))
509 526
527 #if $type_cond.method_cond.ssc_analysis_cond.write_best_params:
528 write.table(r_value, file="$best_r", quote = FALSE, row.names = FALSE, col.names=FALSE, sep = "\t")
529 write.table(s_value, file="$best_s", quote = FALSE, row.names = FALSE, col.names=FALSE, sep = "\t")
530 #end if
531
510 ## print table with summary in pdf 532 ## print table with summary in pdf
511 par(opar) 533 par(opar)
512 plot(0,type='n',axes=FALSE,ann=FALSE) 534 plot(0,type='n',axes=FALSE,ann=FALSE)
513 title(main="Summary for the different parameters\n", adj=0.5) 535 title(main="Summary for the different parameters\n", adj=0.5)
514 ## 20 rows fits in one page: 536 ## 20 rows fits in one page:
540 562
541 ## set variables for components and number of response groups 563 ## set variables for components and number of response groups
542 number_groups = length(levels(y_vector)) 564 number_groups = length(levels(y_vector))
543 565
544 ## SSC analysis and plot 566 ## SSC analysis and plot
545 msidata.ssc <- spatialShrunkenCentroids(msidata, y = y_vector, .fold = fold_vector, 567 msidata.ssc <- spatialShrunkenCentroids(msidata, y = y_vector,
546 r = c($type_cond.method_cond.ssc_r), s = c($type_cond.method_cond.ssc_s), method = "$type_cond.method_cond.ssc_kernel_method") 568 r = c($type_cond.method_cond.ssc_r), s = c($type_cond.method_cond.ssc_s), method = "$type_cond.method_cond.ssc_kernel_method")
547 plot(msidata.ssc, mode = "tstatistics", model = list("r" = c($type_cond.method_cond.ssc_r), "s" = c($type_cond.method_cond.ssc_s))) 569 plot(msidata.ssc, mode = "tstatistics", model = list("r" = c($type_cond.method_cond.ssc_r), "s" = c($type_cond.method_cond.ssc_s)),
570 col=hue_pal()(length(levels(msidata.ssc\$classes[[1]]))), lwd=2)
571
572
548 573
549 ### summary table SSC 574 ### summary table SSC
550 ##############summary_table = summary(msidata.ssc) 575 ##############summary_table = summary(msidata.ssc)
551 576
552 ### stop if multiple values for r and s were used as input 577 ### stop if multiple values for r and s were used as input
580 maximumy = max(coord(msidata)[,2]) 605 maximumy = max(coord(msidata)[,2])
581 print(image(msidata, mz = topFeatures(msidata.ssc)[1,1], normalize.image = "linear", contrast.enhance = "histogram",smooth.image="gaussian", ylim= c(maximumy+0.2*maximumy,minimumy-0.2*minimumy), main="best m/z heatmap")) 606 print(image(msidata, mz = topFeatures(msidata.ssc)[1,1], normalize.image = "linear", contrast.enhance = "histogram",smooth.image="gaussian", ylim= c(maximumy+0.2*maximumy,minimumy-0.2*minimumy), main="best m/z heatmap"))
582 607
583 ## m/z and pixel information output 608 ## m/z and pixel information output
584 ssc_classes = data.frame(msidata.ssc\$classes[[1]]) 609 ssc_classes = data.frame(msidata.ssc\$classes[[1]])
610 ssc_probabilities = data.frame(msidata.ssc\$probabilities[[1]])
585 611
586 ## pixel names and coordinates 612 ## pixel names and coordinates
587 ## to remove potential sample names and z dimension, split at comma and take only x and y 613 ## to remove potential sample names and z dimension, split at comma and take only x and y
588 x_coords = unlist(lapply(strsplit(names(pixels(msidata)), ","), `[[`, 1)) 614 x_coords = unlist(lapply(strsplit(names(pixels(msidata)), ","), `[[`, 1))
589 y_coords = unlist(lapply(strsplit(names(pixels(msidata)), ","), `[[`, 2)) 615 y_coords = unlist(lapply(strsplit(names(pixels(msidata)), ","), `[[`, 2))
594 620
595 ## remove msidata to clean up RAM space 621 ## remove msidata to clean up RAM space
596 rm(msidata) 622 rm(msidata)
597 gc() 623 gc()
598 624
599 ssc_classes2 = data.frame(pixel_names, x_coordinates, y_coordinates, ssc_classes) 625 ssc_classes2 = data.frame(pixel_names, x_coordinates, y_coordinates, ssc_classes, ssc_probabilities)
600 colnames(ssc_classes2) = c("pixel names", "x", "y","predicted condition") 626 colnames(ssc_classes2) = c("pixel names", "x", "y","predicted condition", levels(msidata.ssc\$classes[[1]]))
601 ssc_toplabels = topFeatures(msidata.ssc, n=Inf) 627 ssc_toplabels = topFeatures(msidata.ssc, n=Inf)
602 ssc_toplabels[,6:9] <-round(ssc_toplabels[,6:9],6) 628 ssc_toplabels[,6:9] <-round(ssc_toplabels[,6:9],6)
603 write.table(ssc_toplabels, file="$mzfeatures", quote = FALSE, row.names = FALSE, col.names=TRUE, sep = "\t") 629 write.table(ssc_toplabels, file="$mzfeatures", quote = FALSE, row.names = FALSE, col.names=TRUE, sep = "\t")
604 write.table(ssc_classes2, file="$pixeloutput", quote = FALSE, row.names = FALSE, col.names=TRUE, sep = "\t") 630 write.table(ssc_classes2, file="$pixeloutput", quote = FALSE, row.names = FALSE, col.names=TRUE, sep = "\t")
605 631
606 ## image with predicted classes 632 image(msidata.ssc, model=list(r = c($type_cond.method_cond.ssc_r), s = c($type_cond.method_cond.ssc_s)),
607 prediction_df = cbind(coord(msidata.ssc)[,1:2], ssc_classes) 633 col=hue_pal()(length(levels(msidata.ssc\$classes[[1]]))), mode="classes", layout=c(1,1), main="Class Prediction")
608 colnames(prediction_df) = c("x", "y", "predicted_classes") 634 image(msidata.ssc, model=list(r = c($type_cond.method_cond.ssc_r), s = c($type_cond.method_cond.ssc_s)),
609 635 col=hue_pal()(length(levels(msidata.ssc\$classes[[1]]))), mode="probabilities", layout=c(1,1), main="Class probabilities")
610 prediction_plot = ggplot(prediction_df, aes(x=x, y=y, fill=predicted_classes))+ 636
611 geom_tile() +
612 coord_fixed()+
613 ggtitle("Predicted condition for each pixel")+
614 theme_bw()+
615 theme(text=element_text(family="ArialMT", face="bold", size=15))+
616 theme(legend.position="bottom",legend.direction="vertical")+
617 guides(fill=guide_legend(ncol=4,byrow=TRUE))
618 coord_labels = aggregate(cbind(x,y)~predicted_classes, data=prediction_df, mean, na.rm=TRUE, na.action="na.pass")
619 coord_labels\$file_number = gsub( "_.*$", "", coord_labels\$predicted_classes)
620 print(prediction_plot)
621
622 637
623 ## image with right and wrong classes: 638 ## image with right and wrong classes:
639 prediction_df = cbind(coord(msidata.ssc)[,1:2], ssc_classes)
640 colnames(prediction_df) = c("x", "y", "predicted_classes")
624 comparison_df = cbind(prediction_df, y_vector) 641 comparison_df = cbind(prediction_df, y_vector)
625 comparison_df\$correct<- ifelse(comparison_df\$predicted_classes==comparison_df\$y_vector, T, F) 642 comparison_df\$correct<- ifelse(comparison_df\$predicted_classes==comparison_df\$y_vector, T, F)
643 correctness = round(sum(comparison_df\$correct)/length(comparison_df\$correct)*100,2)
626 644
627 correctness_plot = ggplot(comparison_df, aes(x=x, y=y, fill=correct))+ 645 correctness_plot = ggplot(comparison_df, aes(x=x, y=y, fill=correct))+
628 geom_tile() + 646 geom_tile() +
629 coord_fixed()+ 647 coord_fixed()+
630 ggtitle("Correctness of classification")+ 648 ggtitle(paste0("Correctness of classification: ",correctness, "%"))+
631 theme_bw()+ 649 scale_fill_manual(values = c("TRUE" = "orange","FALSE" = "darkblue"))+
650 theme_bw()+
651 theme(
652 plot.background = element_blank(),
653 panel.grid.major = element_blank(),
654 panel.grid.minor = element_blank())+
632 theme(text=element_text(family="ArialMT", face="bold", size=15))+ 655 theme(text=element_text(family="ArialMT", face="bold", size=15))+
633 theme(legend.position="bottom",legend.direction="vertical")+ 656 theme(legend.position="bottom",legend.direction="vertical")+
634 guides(fill=guide_legend(ncol=2,byrow=TRUE)) 657 guides(fill=guide_legend(ncol=2,byrow=TRUE))
635 ## coord_labels = aggregate(cbind(x,y)~correct, data=comparison_df, mean, na.rm=TRUE, na.action="na.pass") 658 ## coord_labels = aggregate(cbind(x,y)~correct, data=comparison_df, mean, na.rm=TRUE, na.action="na.pass")
636 ##coord_labels\$file_number = gsub( "_.*$", "", coord_labels\$predicted_classes) 659 ##coord_labels\$file_number = gsub( "_.*$", "", coord_labels\$predicted_classes)
666 merged_response = merge(msidata_coordinates, new_y_input, by=c("x", "y"), all.x=TRUE) 689 merged_response = merge(msidata_coordinates, new_y_input, by=c("x", "y"), all.x=TRUE)
667 merged_response[is.na(merged_response)] = "NA" 690 merged_response[is.na(merged_response)] = "NA"
668 merged_response = merged_response[order(merged_response\$pixel_index),] 691 merged_response = merged_response[order(merged_response\$pixel_index),]
669 new_y_vector = as.factor(merged_response[,4]) 692 new_y_vector = as.factor(merged_response[,4])
670 prediction = predict(training_data,msidata, newy = new_y_vector) 693 prediction = predict(training_data,msidata, newy = new_y_vector)
694
695 ## Summary table prediction
696 summary_table = summary(prediction)\$accuracy[[names(prediction@resultData)]]
697 summary_table2 = round(as.numeric(summary_table), digits=2)
698 summary_matrix = matrix(summary_table2, nrow=4, ncol=ncol(summary_table))
699 summary_table3 = cbind(rownames(summary_table), summary_matrix) ## include rownames in table
700 summary_table4 = t(summary_table3)
701 summary_table5 = cbind(c(names(prediction@resultData),colnames(summary_table)), summary_table4)
702 plot(0,type='n',axes=FALSE,ann=FALSE)
703 grid.table(summary_table5, rows= NULL)
671 704
672 #else 705 #else
673 prediction = predict(training_data,msidata) 706 prediction = predict(training_data,msidata)
674 #end if 707 #end if
675 708
682 predicted_classes2 = data.frame(pixel_names, x_coordinates, y_coordinates, predicted_classes) 715 predicted_classes2 = data.frame(pixel_names, x_coordinates, y_coordinates, predicted_classes)
683 colnames(predicted_classes2) = c("pixel names", "x", "y","predicted condition") 716 colnames(predicted_classes2) = c("pixel names", "x", "y","predicted condition")
684 predicted_toplabels = topFeatures(prediction, n=Inf) 717 predicted_toplabels = topFeatures(prediction, n=Inf)
685 if (colnames(predicted_toplabels)[4] == "coefficients"){ 718 if (colnames(predicted_toplabels)[4] == "coefficients"){
686 predicted_toplabels[,4:6] <-round(predicted_toplabels[,4:6],5) 719 predicted_toplabels[,4:6] <-round(predicted_toplabels[,4:6],5)
687
688 }else{ 720 }else{
689 predicted_toplabels[,6:9] <-round(predicted_toplabels[,6:9],5)} 721 predicted_toplabels[,6:9] <-round(predicted_toplabels[,6:9],5)}
722
723 ##predicted classes
724 prediction_df = cbind(coord(prediction)[,1:2], predicted_classes)
725 colnames(prediction_df) = c("x", "y", "predicted_classes")
726
727 #if str($type_cond.classification_type) == "SSC_classifier":
728 ## this seems to work only for SSC, therefore overwrite tables
729 predicted_probabilities = data.frame(prediction\$probabilities[[1]])
730 predicted_classes2 = data.frame(pixel_names, x_coordinates, y_coordinates, predicted_classes, predicted_probabilities)
731 colnames(predicted_classes2) = c("pixel names", "x", "y","predicted condition", levels(prediction\$classes[[1]]))
732 ## also image modes are specific to SSC
733 image(prediction, mode="classes", layout=c(1,1), main="Class", col=hue_pal()(length(unique(prediction\$classes[[1]]))))
734 image(prediction, mode="probabilities", layout=c(1,1), main="Class probabilities", col=hue_pal()(length(unique(prediction\$classes[[1]]))))
735
736 #else
737
738 prediction_plot = ggplot(prediction_df, aes(x=x, y=y, fill=predicted_classes))+
739 geom_tile()+
740 coord_fixed()+
741 ggtitle("Predicted condition for each spectrum")+
742 theme_bw()+
743 theme(
744 plot.background = element_blank(),
745 panel.grid.major = element_blank(),
746 panel.grid.minor = element_blank())+
747 theme(text=element_text(family="ArialMT", face="bold", size=15))+
748 theme(legend.position="bottom", legend.direction="vertical")+
749 guides(fill=guide_legend(ncol=4, byrow=TRUE))
750 coord_labels = aggregate(cbind(x,y)~predicted_classes, data=prediction_df, mean, na.rm=TRUE, na.action="na.pass")
751 coord_labels\$file_number = gsub( "_.*ยง", "", coord_labels\$predicted_classes)
752 print(prediction_plot)
753 #end if
754
690 write.table(predicted_toplabels, file="$mzfeatures", quote = FALSE, row.names = FALSE, col.names=TRUE, sep = "\t") 755 write.table(predicted_toplabels, file="$mzfeatures", quote = FALSE, row.names = FALSE, col.names=TRUE, sep = "\t")
691 write.table(predicted_classes2, file="$pixeloutput", quote = FALSE, row.names = FALSE, col.names=TRUE, sep = "\t") 756 write.table(predicted_classes2, file="$pixeloutput", quote = FALSE, row.names = FALSE, col.names=TRUE, sep = "\t")
692 757
693 ## image with predicted classes 758
694 759
695 prediction_df = cbind(coord(prediction)[,1:2], predicted_classes)
696 colnames(prediction_df) = c("x", "y", "predicted_classes")
697
698 prediction_plot = ggplot(prediction_df, aes(x=x, y=y, fill=predicted_classes))+
699 geom_tile() +
700 coord_fixed()+
701 ggtitle("Predicted condition for each pixel")+
702 theme_bw()+
703 theme(text=element_text(family="ArialMT", face="bold", size=15))+
704 theme(legend.position="bottom",legend.direction="vertical")+
705 guides(fill=guide_legend(ncol=4,byrow=TRUE))
706 coord_labels = aggregate(cbind(x,y)~predicted_classes, data=prediction_df, mean, na.rm=TRUE, na.action="na.pass")
707 coord_labels\$file_number = gsub( "_.*$", "", coord_labels\$predicted_classes)
708 print(prediction_plot)
709 760
761 #if str($type_cond.new_y_values_cond.new_y_values) == "new_response":
710 ## image with right and wrong classes: 762 ## image with right and wrong classes:
711 763
712 comparison_df = cbind(prediction_df, new_y_vector) 764 comparison_df = cbind(prediction_df, new_y_vector)
713 comparison_df\$correct<- as.factor(ifelse(comparison_df\$predicted_classes==comparison_df\$new_y_vector, T, F)) 765 comparison_df\$correct<- ifelse(comparison_df\$predicted_classes==comparison_df\$new_y_vector, T, F)
766 correctness = round(sum(comparison_df\$correct)/length(comparison_df\$correct)*100,2)
714 767
715 correctness_plot = ggplot(comparison_df, aes(x=x, y=y, fill=correct))+ 768 correctness_plot = ggplot(comparison_df, aes(x=x, y=y, fill=correct))+
716 geom_tile()+ 769 geom_tile()+
717 scale_fill_manual(values = c("TRUE" = "orange","FALSE" = "darkblue"))+ 770 scale_fill_manual(values = c("TRUE" = "orange","FALSE" = "darkblue"))+
718 coord_fixed()+ 771 coord_fixed()+
719 ggtitle("Correctness of classification")+ 772 ggtitle(paste0("Correctness of classification: ",correctness, "%"))+
720 theme_bw()+ 773 theme_bw()+
721 theme(text=element_text(family="ArialMT", face="bold", size=15))+ 774 theme(text=element_text(family="ArialMT", face="bold", size=15))+
722 theme(legend.position="bottom",legend.direction="vertical")+ 775 theme(legend.position="bottom",legend.direction="vertical")+
723 guides(fill=guide_legend(ncol=2,byrow=TRUE)) 776 guides(fill=guide_legend(ncol=2,byrow=TRUE))
724 ## coord_labels = aggregate(cbind(x,y)~correct, data=comparison_df, mean, na.rm=TRUE, na.action="na.pass")
725 ##coord_labels\$file_number = gsub( "_.*$", "", coord_labels\$predicted_classes)
726 print(correctness_plot) 777 print(correctness_plot)
727 778 #end if
728
729 ## Summary table prediction
730 summary_table = summary(prediction)\$accuracy[[names(prediction@resultData)]]
731 summary_table2 = round(as.numeric(summary_table), digits=2)
732 summary_matrix = matrix(summary_table2, nrow=4, ncol=ncol(summary_table))
733 summary_table3 = cbind(rownames(summary_table), summary_matrix) ## include rownames in table
734 summary_table4 = t(summary_table3)
735 summary_table5 = cbind(c(names(prediction@resultData),colnames(summary_table)), summary_table4)
736 plot(0,type='n',axes=FALSE,ann=FALSE)
737 grid.table(summary_table5, rows= NULL)
738 779
739 ## optional output as .RData 780 ## optional output as .RData
740 #if $output_rdata: 781 #if $output_rdata:
741 msidata = prediction 782 msidata = prediction
742 save(msidata, file="$classification_rdata") 783 save(msidata, file="$classification_rdata")
831 <conditional name="ssc_analysis_cond"> 872 <conditional name="ssc_analysis_cond">
832 <param name="ssc_method" type="select" label="Analysis step to perform"> 873 <param name="ssc_method" type="select" label="Analysis step to perform">
833 <option value="ssc_cvapply" selected="True">cvApply</option> 874 <option value="ssc_cvapply" selected="True">cvApply</option>
834 <option value="ssc_analysis">spatial shrunken centroids analysis</option> 875 <option value="ssc_analysis">spatial shrunken centroids analysis</option>
835 </param> 876 </param>
836 <when value="ssc_cvapply"/> 877 <when value="ssc_cvapply">
837 878 <param name="write_best_params" type="boolean" label="Write out best r and s values" help="Can be used to generate automatic classification workflow"/>
879 </when>
838 <when value="ssc_analysis"> 880 <when value="ssc_analysis">
839 <!--param name="ssc_toplabels" type="integer" value="100" 881 <!--param name="ssc_toplabels" type="integer" value="100"
840 label="Number of toplabels (m/z features) which should be written in tabular output"/--> 882 label="Number of toplabels (m/z features) which should be written in tabular output"/-->
841 </when> 883 </when>
842 </conditional> 884 </conditional>
849 <expand macro="sanitizer_multiple_digits"/> 891 <expand macro="sanitizer_multiple_digits"/>
850 </param> 892 </param>
851 <param name="ssc_kernel_method" type="select" display="radio" label = "The method to use to calculate the spatial smoothing kernels for the embedding. The 'gaussian' method refers to spatially-aware (SA) weights, and 'adaptive' refers to spatially-aware structurally-adaptive (SASA) weights"> 893 <param name="ssc_kernel_method" type="select" display="radio" label = "The method to use to calculate the spatial smoothing kernels for the embedding. The 'gaussian' method refers to spatially-aware (SA) weights, and 'adaptive' refers to spatially-aware structurally-adaptive (SASA) weights">
852 <option value="gaussian">gaussian</option> 894 <option value="gaussian">gaussian</option>
853 <option value="adaptive" selected="True">adaptive</option> 895 <option value="adaptive" selected="True">adaptive</option>
854 </param> 896 </param>
855
856 </when> 897 </when>
857 </conditional> 898 </conditional>
858 899
859 </when> 900 </when>
860 901
861 <when value="prediction"> 902 <when value="prediction">
862 <param name="training_result" type="data" format="rdata" label="Result from previous classification training"/> 903 <param name="training_result" type="data" format="rdata" label="Result from previous classification training"/>
863 <!--param name="predicted_toplabels" type="integer" value="100" 904 <!--param name="predicted_toplabels" type="integer" value="100"
864 label="Number of toplabels (m/z features) which should be written in tabular output"/--> 905 label="Number of toplabels (m/z features) which should be written in tabular output"/-->
906 <param name="classification_type" type="select" display="radio" optional="False" label="Which classification method was used">
907 <option value="PLS_classifier" selected="True" >PLS classifier</option>
908 <option value="OPLS_classifier">OPLS classifier</option>
909 <option value="SSC_classifier">SSC_classifier</option>
910 </param>
865 <conditional name="new_y_values_cond"> 911 <conditional name="new_y_values_cond">
866 <param name="new_y_values" type="select" label="Should new response values be used"> 912 <param name="new_y_values" type="select" label="Load annotations (optional, but allows accuracy calculations)">
867 <option value="no_new_response" selected="True">old response should be used</option> 913 <option value="no_new_response" selected="True">no</option>
868 <option value="new_response">load new response from tabular file</option> 914 <option value="new_response">use annotations</option>
869 </param> 915 </param>
870 <when value="no_new_response"/> 916 <when value="no_new_response"/>
871 <when value="new_response"> 917 <when value="new_response">
872 <param name="new_response_file" type="data" format="tabular" label="Load tabular file with pixel coordinates and the new response"/> 918 <param name="new_response_file" type="data" format="tabular" label="Load tabular file with pixel coordinates and the new response"/>
873 <param name="column_new_x" data_ref="new_response_file" label="Column with x values" type="data_column"/> 919 <param name="column_new_x" data_ref="new_response_file" label="Column with x values" type="data_column"/>
882 </inputs> 928 </inputs>
883 <outputs> 929 <outputs>
884 <data format="pdf" name="classification_images" from_work_dir="classificationpdf.pdf" label = "${tool.name} on ${on_string}: results"/> 930 <data format="pdf" name="classification_images" from_work_dir="classificationpdf.pdf" label = "${tool.name} on ${on_string}: results"/>
885 <data format="tabular" name="mzfeatures" label="${tool.name} on ${on_string}: features"/> 931 <data format="tabular" name="mzfeatures" label="${tool.name} on ${on_string}: features"/>
886 <data format="tabular" name="pixeloutput" label="${tool.name} on ${on_string}: pixels"/> 932 <data format="tabular" name="pixeloutput" label="${tool.name} on ${on_string}: pixels"/>
933 <data format="txt" name="best_r" label="${tool.name} on ${on_string}:best r">
934 <filter>type_cond['type_method'] == 'training' and type_cond['method_cond']['class_method'] == 'spatialShrunkenCentroids' and type_cond['method_cond']['ssc_analysis_cond']['ssc_method'] == 'ssc_cvapply' and type_cond['method_cond']['ssc_analysis_cond']['write_best_params']</filter>
935 </data>
936 <data format="txt" name="best_s" label="${tool.name} on ${on_string}:best s">
937 <filter>type_cond['type_method'] == 'training' and type_cond['method_cond']['class_method'] == 'spatialShrunkenCentroids' and type_cond['method_cond']['ssc_analysis_cond']['ssc_method'] == 'ssc_cvapply' and type_cond['method_cond']['ssc_analysis_cond']['write_best_params']</filter>
938 </data>
887 <data format="rdata" name="classification_rdata" label="${tool.name} on ${on_string}: results.RData"> 939 <data format="rdata" name="classification_rdata" label="${tool.name} on ${on_string}: results.RData">
888 <filter>output_rdata</filter> 940 <filter>output_rdata</filter>
889 </data> 941 </data>
890 </outputs> 942 </outputs>
891 <tests> 943 <tests>
1099 - O-PLS-DA: Orthogonal partial least squares discriminant analysis 1151 - O-PLS-DA: Orthogonal partial least squares discriminant analysis
1100 - Spatial shrunken centroids (more details in `Bemis et al. <https://doi.org/10.1074/mcp.O115.053918>`_) 1152 - Spatial shrunken centroids (more details in `Bemis et al. <https://doi.org/10.1074/mcp.O115.053918>`_)
1101 - training and prediction 1153 - training and prediction
1102 1154
1103 - training can be done with cvapply that uses cross validation to find the best value for s, this requires not only a condition for each spectrum but also a fold (each fold should contain spectra of all conditions) 1155 - training can be done with cvapply that uses cross validation to find the best value for s, this requires not only a condition for each spectrum but also a fold (each fold should contain spectra of all conditions)
1104 - training with the best value for s gives the top m/z features for each condition and the predicted classification group for each spectrum 1156 - training with the best value for r and s gives the top m/z features for each condition and the predicted classification group for each spectrum
1105 - training result can be saved as RData file that can be reused for prediction of further samples 1157 - training result can be saved as RData file that can be reused for prediction of further samples
1158 - prediction can calculate accuracies when the annotations are known and provided
1106 1159
1107 1160
1108 .. image:: $PATH_TO_IMAGES/classification_overview.png 1161 .. image:: $PATH_TO_IMAGES/classification_overview.png
1109 :width: 1000 1162 :width: 1000
1110 :height: 465 1163 :height: 465