Mercurial > repos > galaxyp > cardinal_classification
comparison classification.xml @ 14:ece627528a78 draft
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cardinal commit 888b3e991d0752b694bf480531ce0e5318c2f337-dirty"
author | galaxyp |
---|---|
date | Fri, 07 May 2021 10:10:35 +0000 |
parents | 24c000517173 |
children | f28ad96b76dc |
comparison
equal
deleted
inserted
replaced
13:b18329a8ac14 | 14:ece627528a78 |
---|---|
1 <tool id="cardinal_classification" name="MSI classification" version="@VERSION@.0"> | 1 <tool id="cardinal_classification" name="MSI classification" version="@VERSION@.1"> |
2 <description>spatial classification of mass spectrometry imaging data</description> | 2 <description>spatial classification of mass spectrometry imaging data</description> |
3 <macros> | 3 <macros> |
4 <import>macros.xml</import> | 4 <import>macros.xml</import> |
5 </macros> | 5 </macros> |
6 <expand macro="requirements"> | 6 <expand macro="requirements"> |
23 ################################# load libraries and read file ######################### | 23 ################################# load libraries and read file ######################### |
24 | 24 |
25 library(Cardinal) | 25 library(Cardinal) |
26 library(gridExtra) | 26 library(gridExtra) |
27 library(ggplot2) | 27 library(ggplot2) |
28 | 28 library(scales) |
29 | 29 |
30 @READING_MSIDATA@ | 30 @READING_MSIDATA@ |
31 | 31 |
32 msidata = as(msidata, "MSImageSet") ##coercion to MSImageSet | 32 msidata = as(msidata, "MSImageSet") ##coercion to MSImageSet |
33 | 33 |
55 | 55 |
56 title(main=paste0(Title," for file: \n\n", "$infile.display_name")) | 56 title(main=paste0(Title," for file: \n\n", "$infile.display_name")) |
57 | 57 |
58 | 58 |
59 ##################### I) numbers and control plots ############################# | 59 ##################### I) numbers and control plots ############################# |
60 ############################################################################### | 60 ################################################################################ |
61 | 61 |
62 ## table with values | 62 ## table with values |
63 grid.table(property_df, rows= NULL) | 63 grid.table(property_df, rows= NULL) |
64 | 64 |
65 | 65 |
66 if (npeaks > 0 && sum(is.na(spectra(msidata)))==0){ | 66 if (npeaks > 0 && sum(is.na(spectra(msidata)))==0){ |
67 | 67 |
68 opar <- par() | 68 opar <- par() |
69 | 69 |
70 ######################## II) Training ############################# | 70 ######################## II) Training ####################################### |
71 ############################################################################# | 71 ############################################################################# |
72 #if str( $type_cond.type_method) == "training": | 72 #if str( $type_cond.type_method) == "training": |
73 print("training") | 73 print("training") |
74 | 74 |
75 | 75 |
88 msidata_coordinates = cbind(coord(msidata)[,1:2], c(1:ncol(msidata))) | 88 msidata_coordinates = cbind(coord(msidata)[,1:2], c(1:ncol(msidata))) |
89 colnames(msidata_coordinates)[3] = "pixel_index" | 89 colnames(msidata_coordinates)[3] = "pixel_index" |
90 merged_response = merge(msidata_coordinates, y_input, by=c("x", "y"), all.x=TRUE) | 90 merged_response = merge(msidata_coordinates, y_input, by=c("x", "y"), all.x=TRUE) |
91 merged_response[is.na(merged_response)] = "NA" | 91 merged_response[is.na(merged_response)] = "NA" |
92 merged_response = merged_response[order(merged_response\$pixel_index),] | 92 merged_response = merged_response[order(merged_response\$pixel_index),] |
93 y_vector = as.factor(merged_response[,4]) | 93 conditions = as.factor(merged_response[,4]) |
94 y_vector = conditions | |
94 | 95 |
95 ## plot of y vector | 96 ## plot of y vector |
96 | 97 |
97 position_df = cbind(coord(msidata)[,1:2], y_vector) | 98 position_df = cbind(coord(msidata)[,1:2], conditions) |
98 y_plot = ggplot(position_df, aes(x=x, y=y, fill=y_vector))+ | 99 y_plot = ggplot(position_df, aes(x=x, y=y, fill=conditions))+ |
99 geom_tile() + | 100 geom_tile() + |
100 coord_fixed()+ | 101 coord_fixed()+ |
101 ggtitle("Distribution of the response variable y")+ | 102 ggtitle("Distribution of the conditions")+ |
102 theme_bw()+ | 103 theme_bw()+ |
104 theme( | |
105 plot.background = element_blank(), | |
106 panel.grid.major = element_blank(), | |
107 panel.grid.minor = element_blank())+ | |
103 theme(text=element_text(family="ArialMT", face="bold", size=15))+ | 108 theme(text=element_text(family="ArialMT", face="bold", size=15))+ |
104 theme(legend.position="bottom",legend.direction="vertical")+ | 109 theme(legend.position="bottom",legend.direction="vertical")+ |
105 guides(fill=guide_legend(ncol=4,byrow=TRUE)) | 110 guides(fill=guide_legend(ncol=4,byrow=TRUE)) |
106 coord_labels = aggregate(cbind(x,y)~y_vector, data=position_df, mean, na.rm=TRUE, na.action="na.pass") | 111 coord_labels = aggregate(cbind(x,y)~conditions, data=position_df, mean, na.rm=TRUE, na.action="na.pass") |
107 coord_labels\$file_number = gsub( "_.*$", "", coord_labels\$y_vector) | 112 coord_labels\$file_number = gsub( "_.*$", "", coord_labels\$conditions) |
108 print(y_plot) | 113 print(y_plot) |
109 | 114 |
110 | 115 |
111 ## plot of folds | 116 ## plot of folds |
112 | 117 |
117 position_df = cbind(coord(msidata)[,1:2], fold_vector) | 122 position_df = cbind(coord(msidata)[,1:2], fold_vector) |
118 fold_plot = ggplot(position_df, aes(x=x, y=y, fill=fold_vector))+ | 123 fold_plot = ggplot(position_df, aes(x=x, y=y, fill=fold_vector))+ |
119 geom_tile() + | 124 geom_tile() + |
120 coord_fixed()+ | 125 coord_fixed()+ |
121 ggtitle("Distribution of the fold variable")+ | 126 ggtitle("Distribution of the fold variable")+ |
122 theme_bw()+ | 127 theme_bw()+ |
128 theme( | |
129 plot.background = element_blank(), | |
130 panel.grid.major = element_blank(), | |
131 panel.grid.minor = element_blank())+ | |
123 theme(text=element_text(family="ArialMT", face="bold", size=15))+ | 132 theme(text=element_text(family="ArialMT", face="bold", size=15))+ |
124 theme(legend.position="bottom",legend.direction="vertical")+ | 133 theme(legend.position="bottom",legend.direction="vertical")+ |
125 guides(fill=guide_legend(ncol=4,byrow=TRUE)) | 134 guides(fill=guide_legend(ncol=4,byrow=TRUE)) |
126 coord_labels = aggregate(cbind(x,y)~fold_vector, data=position_df, mean, na.rm=TRUE, na.action="na.pass") | 135 coord_labels = aggregate(cbind(x,y)~fold_vector, data=position_df, mean, na.rm=TRUE, na.action="na.pass") |
127 coord_labels\$file_number = gsub( "_.*$", "", coord_labels\$fold_vector) | 136 coord_labels\$file_number = gsub( "_.*$", "", coord_labels\$fold_vector) |
274 | 283 |
275 prediction_plot = ggplot(prediction_df, aes(x=x, y=y, fill=predicted_classes))+ | 284 prediction_plot = ggplot(prediction_df, aes(x=x, y=y, fill=predicted_classes))+ |
276 geom_tile() + | 285 geom_tile() + |
277 coord_fixed()+ | 286 coord_fixed()+ |
278 ggtitle("Predicted condition for each pixel")+ | 287 ggtitle("Predicted condition for each pixel")+ |
279 theme_bw()+ | 288 theme_bw()+ |
289 theme( | |
290 plot.background = element_blank(), | |
291 panel.grid.major = element_blank(), | |
292 panel.grid.minor = element_blank())+ | |
280 theme(text=element_text(family="ArialMT", face="bold", size=15))+ | 293 theme(text=element_text(family="ArialMT", face="bold", size=15))+ |
281 theme(legend.position="bottom",legend.direction="vertical")+ | 294 theme(legend.position="bottom",legend.direction="vertical")+ |
282 guides(fill=guide_legend(ncol=4,byrow=TRUE)) | 295 guides(fill=guide_legend(ncol=4,byrow=TRUE)) |
283 coord_labels = aggregate(cbind(x,y)~predicted_classes, data=prediction_df, mean, na.rm=TRUE, na.action="na.pass") | 296 coord_labels = aggregate(cbind(x,y)~predicted_classes, data=prediction_df, mean, na.rm=TRUE, na.action="na.pass") |
284 coord_labels\$file_number = gsub( "_.*$", "", coord_labels\$predicted_classes) | 297 coord_labels\$file_number = gsub( "_.*$", "", coord_labels\$predicted_classes) |
441 | 454 |
442 prediction_plot = ggplot(prediction_df, aes(x=x, y=y, fill=predicted_classes))+ | 455 prediction_plot = ggplot(prediction_df, aes(x=x, y=y, fill=predicted_classes))+ |
443 geom_tile() + | 456 geom_tile() + |
444 coord_fixed()+ | 457 coord_fixed()+ |
445 ggtitle("Predicted condition for each pixel")+ | 458 ggtitle("Predicted condition for each pixel")+ |
446 theme_bw()+ | 459 theme_bw()+ |
460 theme( | |
461 plot.background = element_blank(), | |
462 panel.grid.major = element_blank(), | |
463 panel.grid.minor = element_blank())+ | |
447 theme(text=element_text(family="ArialMT", face="bold", size=15))+ | 464 theme(text=element_text(family="ArialMT", face="bold", size=15))+ |
448 theme(legend.position="bottom",legend.direction="vertical")+ | 465 theme(legend.position="bottom",legend.direction="vertical")+ |
449 guides(fill=guide_legend(ncol=4,byrow=TRUE)) | 466 guides(fill=guide_legend(ncol=4,byrow=TRUE)) |
450 coord_labels = aggregate(cbind(x,y)~predicted_classes, data=prediction_df, mean, na.rm=TRUE, na.action="na.pass") | 467 coord_labels = aggregate(cbind(x,y)~predicted_classes, data=prediction_df, mean, na.rm=TRUE, na.action="na.pass") |
451 coord_labels\$file_number = gsub( "_.*$", "", coord_labels\$predicted_classes) | 468 coord_labels\$file_number = gsub( "_.*$", "", coord_labels\$predicted_classes) |
505 s_value = as.numeric(substring(unlist(strsplit(best_params, ","))[3], 5)) ## remove space | 522 s_value = as.numeric(substring(unlist(strsplit(best_params, ","))[3], 5)) ## remove space |
506 minimumy = min(coord(msidata.cv.ssc)[,2]) | 523 minimumy = min(coord(msidata.cv.ssc)[,2]) |
507 maximumy = max(coord(msidata.cv.ssc)[,2]) | 524 maximumy = max(coord(msidata.cv.ssc)[,2]) |
508 image(msidata.cv.ssc, model = list( r = r_value, s = s_value ), ylim= c(maximumy+0.2*maximumy,minimumy-0.2*minimumy),layout=c(1,1)) | 525 image(msidata.cv.ssc, model = list( r = r_value, s = s_value ), ylim= c(maximumy+0.2*maximumy,minimumy-0.2*minimumy),layout=c(1,1)) |
509 | 526 |
527 #if $type_cond.method_cond.ssc_analysis_cond.write_best_params: | |
528 write.table(r_value, file="$best_r", quote = FALSE, row.names = FALSE, col.names=FALSE, sep = "\t") | |
529 write.table(s_value, file="$best_s", quote = FALSE, row.names = FALSE, col.names=FALSE, sep = "\t") | |
530 #end if | |
531 | |
510 ## print table with summary in pdf | 532 ## print table with summary in pdf |
511 par(opar) | 533 par(opar) |
512 plot(0,type='n',axes=FALSE,ann=FALSE) | 534 plot(0,type='n',axes=FALSE,ann=FALSE) |
513 title(main="Summary for the different parameters\n", adj=0.5) | 535 title(main="Summary for the different parameters\n", adj=0.5) |
514 ## 20 rows fits in one page: | 536 ## 20 rows fits in one page: |
540 | 562 |
541 ## set variables for components and number of response groups | 563 ## set variables for components and number of response groups |
542 number_groups = length(levels(y_vector)) | 564 number_groups = length(levels(y_vector)) |
543 | 565 |
544 ## SSC analysis and plot | 566 ## SSC analysis and plot |
545 msidata.ssc <- spatialShrunkenCentroids(msidata, y = y_vector, .fold = fold_vector, | 567 msidata.ssc <- spatialShrunkenCentroids(msidata, y = y_vector, |
546 r = c($type_cond.method_cond.ssc_r), s = c($type_cond.method_cond.ssc_s), method = "$type_cond.method_cond.ssc_kernel_method") | 568 r = c($type_cond.method_cond.ssc_r), s = c($type_cond.method_cond.ssc_s), method = "$type_cond.method_cond.ssc_kernel_method") |
547 plot(msidata.ssc, mode = "tstatistics", model = list("r" = c($type_cond.method_cond.ssc_r), "s" = c($type_cond.method_cond.ssc_s))) | 569 plot(msidata.ssc, mode = "tstatistics", model = list("r" = c($type_cond.method_cond.ssc_r), "s" = c($type_cond.method_cond.ssc_s)), |
570 col=hue_pal()(length(levels(msidata.ssc\$classes[[1]]))), lwd=2) | |
571 | |
572 | |
548 | 573 |
549 ### summary table SSC | 574 ### summary table SSC |
550 ##############summary_table = summary(msidata.ssc) | 575 ##############summary_table = summary(msidata.ssc) |
551 | 576 |
552 ### stop if multiple values for r and s were used as input | 577 ### stop if multiple values for r and s were used as input |
580 maximumy = max(coord(msidata)[,2]) | 605 maximumy = max(coord(msidata)[,2]) |
581 print(image(msidata, mz = topFeatures(msidata.ssc)[1,1], normalize.image = "linear", contrast.enhance = "histogram",smooth.image="gaussian", ylim= c(maximumy+0.2*maximumy,minimumy-0.2*minimumy), main="best m/z heatmap")) | 606 print(image(msidata, mz = topFeatures(msidata.ssc)[1,1], normalize.image = "linear", contrast.enhance = "histogram",smooth.image="gaussian", ylim= c(maximumy+0.2*maximumy,minimumy-0.2*minimumy), main="best m/z heatmap")) |
582 | 607 |
583 ## m/z and pixel information output | 608 ## m/z and pixel information output |
584 ssc_classes = data.frame(msidata.ssc\$classes[[1]]) | 609 ssc_classes = data.frame(msidata.ssc\$classes[[1]]) |
610 ssc_probabilities = data.frame(msidata.ssc\$probabilities[[1]]) | |
585 | 611 |
586 ## pixel names and coordinates | 612 ## pixel names and coordinates |
587 ## to remove potential sample names and z dimension, split at comma and take only x and y | 613 ## to remove potential sample names and z dimension, split at comma and take only x and y |
588 x_coords = unlist(lapply(strsplit(names(pixels(msidata)), ","), `[[`, 1)) | 614 x_coords = unlist(lapply(strsplit(names(pixels(msidata)), ","), `[[`, 1)) |
589 y_coords = unlist(lapply(strsplit(names(pixels(msidata)), ","), `[[`, 2)) | 615 y_coords = unlist(lapply(strsplit(names(pixels(msidata)), ","), `[[`, 2)) |
594 | 620 |
595 ## remove msidata to clean up RAM space | 621 ## remove msidata to clean up RAM space |
596 rm(msidata) | 622 rm(msidata) |
597 gc() | 623 gc() |
598 | 624 |
599 ssc_classes2 = data.frame(pixel_names, x_coordinates, y_coordinates, ssc_classes) | 625 ssc_classes2 = data.frame(pixel_names, x_coordinates, y_coordinates, ssc_classes, ssc_probabilities) |
600 colnames(ssc_classes2) = c("pixel names", "x", "y","predicted condition") | 626 colnames(ssc_classes2) = c("pixel names", "x", "y","predicted condition", levels(msidata.ssc\$classes[[1]])) |
601 ssc_toplabels = topFeatures(msidata.ssc, n=Inf) | 627 ssc_toplabels = topFeatures(msidata.ssc, n=Inf) |
602 ssc_toplabels[,6:9] <-round(ssc_toplabels[,6:9],6) | 628 ssc_toplabels[,6:9] <-round(ssc_toplabels[,6:9],6) |
603 write.table(ssc_toplabels, file="$mzfeatures", quote = FALSE, row.names = FALSE, col.names=TRUE, sep = "\t") | 629 write.table(ssc_toplabels, file="$mzfeatures", quote = FALSE, row.names = FALSE, col.names=TRUE, sep = "\t") |
604 write.table(ssc_classes2, file="$pixeloutput", quote = FALSE, row.names = FALSE, col.names=TRUE, sep = "\t") | 630 write.table(ssc_classes2, file="$pixeloutput", quote = FALSE, row.names = FALSE, col.names=TRUE, sep = "\t") |
605 | 631 |
606 ## image with predicted classes | 632 image(msidata.ssc, model=list(r = c($type_cond.method_cond.ssc_r), s = c($type_cond.method_cond.ssc_s)), |
607 prediction_df = cbind(coord(msidata.ssc)[,1:2], ssc_classes) | 633 col=hue_pal()(length(levels(msidata.ssc\$classes[[1]]))), mode="classes", layout=c(1,1), main="Class Prediction") |
608 colnames(prediction_df) = c("x", "y", "predicted_classes") | 634 image(msidata.ssc, model=list(r = c($type_cond.method_cond.ssc_r), s = c($type_cond.method_cond.ssc_s)), |
609 | 635 col=hue_pal()(length(levels(msidata.ssc\$classes[[1]]))), mode="probabilities", layout=c(1,1), main="Class probabilities") |
610 prediction_plot = ggplot(prediction_df, aes(x=x, y=y, fill=predicted_classes))+ | 636 |
611 geom_tile() + | |
612 coord_fixed()+ | |
613 ggtitle("Predicted condition for each pixel")+ | |
614 theme_bw()+ | |
615 theme(text=element_text(family="ArialMT", face="bold", size=15))+ | |
616 theme(legend.position="bottom",legend.direction="vertical")+ | |
617 guides(fill=guide_legend(ncol=4,byrow=TRUE)) | |
618 coord_labels = aggregate(cbind(x,y)~predicted_classes, data=prediction_df, mean, na.rm=TRUE, na.action="na.pass") | |
619 coord_labels\$file_number = gsub( "_.*$", "", coord_labels\$predicted_classes) | |
620 print(prediction_plot) | |
621 | |
622 | 637 |
623 ## image with right and wrong classes: | 638 ## image with right and wrong classes: |
639 prediction_df = cbind(coord(msidata.ssc)[,1:2], ssc_classes) | |
640 colnames(prediction_df) = c("x", "y", "predicted_classes") | |
624 comparison_df = cbind(prediction_df, y_vector) | 641 comparison_df = cbind(prediction_df, y_vector) |
625 comparison_df\$correct<- ifelse(comparison_df\$predicted_classes==comparison_df\$y_vector, T, F) | 642 comparison_df\$correct<- ifelse(comparison_df\$predicted_classes==comparison_df\$y_vector, T, F) |
643 correctness = round(sum(comparison_df\$correct)/length(comparison_df\$correct)*100,2) | |
626 | 644 |
627 correctness_plot = ggplot(comparison_df, aes(x=x, y=y, fill=correct))+ | 645 correctness_plot = ggplot(comparison_df, aes(x=x, y=y, fill=correct))+ |
628 geom_tile() + | 646 geom_tile() + |
629 coord_fixed()+ | 647 coord_fixed()+ |
630 ggtitle("Correctness of classification")+ | 648 ggtitle(paste0("Correctness of classification: ",correctness, "%"))+ |
631 theme_bw()+ | 649 scale_fill_manual(values = c("TRUE" = "orange","FALSE" = "darkblue"))+ |
650 theme_bw()+ | |
651 theme( | |
652 plot.background = element_blank(), | |
653 panel.grid.major = element_blank(), | |
654 panel.grid.minor = element_blank())+ | |
632 theme(text=element_text(family="ArialMT", face="bold", size=15))+ | 655 theme(text=element_text(family="ArialMT", face="bold", size=15))+ |
633 theme(legend.position="bottom",legend.direction="vertical")+ | 656 theme(legend.position="bottom",legend.direction="vertical")+ |
634 guides(fill=guide_legend(ncol=2,byrow=TRUE)) | 657 guides(fill=guide_legend(ncol=2,byrow=TRUE)) |
635 ## coord_labels = aggregate(cbind(x,y)~correct, data=comparison_df, mean, na.rm=TRUE, na.action="na.pass") | 658 ## coord_labels = aggregate(cbind(x,y)~correct, data=comparison_df, mean, na.rm=TRUE, na.action="na.pass") |
636 ##coord_labels\$file_number = gsub( "_.*$", "", coord_labels\$predicted_classes) | 659 ##coord_labels\$file_number = gsub( "_.*$", "", coord_labels\$predicted_classes) |
666 merged_response = merge(msidata_coordinates, new_y_input, by=c("x", "y"), all.x=TRUE) | 689 merged_response = merge(msidata_coordinates, new_y_input, by=c("x", "y"), all.x=TRUE) |
667 merged_response[is.na(merged_response)] = "NA" | 690 merged_response[is.na(merged_response)] = "NA" |
668 merged_response = merged_response[order(merged_response\$pixel_index),] | 691 merged_response = merged_response[order(merged_response\$pixel_index),] |
669 new_y_vector = as.factor(merged_response[,4]) | 692 new_y_vector = as.factor(merged_response[,4]) |
670 prediction = predict(training_data,msidata, newy = new_y_vector) | 693 prediction = predict(training_data,msidata, newy = new_y_vector) |
694 | |
695 ## Summary table prediction | |
696 summary_table = summary(prediction)\$accuracy[[names(prediction@resultData)]] | |
697 summary_table2 = round(as.numeric(summary_table), digits=2) | |
698 summary_matrix = matrix(summary_table2, nrow=4, ncol=ncol(summary_table)) | |
699 summary_table3 = cbind(rownames(summary_table), summary_matrix) ## include rownames in table | |
700 summary_table4 = t(summary_table3) | |
701 summary_table5 = cbind(c(names(prediction@resultData),colnames(summary_table)), summary_table4) | |
702 plot(0,type='n',axes=FALSE,ann=FALSE) | |
703 grid.table(summary_table5, rows= NULL) | |
671 | 704 |
672 #else | 705 #else |
673 prediction = predict(training_data,msidata) | 706 prediction = predict(training_data,msidata) |
674 #end if | 707 #end if |
675 | 708 |
682 predicted_classes2 = data.frame(pixel_names, x_coordinates, y_coordinates, predicted_classes) | 715 predicted_classes2 = data.frame(pixel_names, x_coordinates, y_coordinates, predicted_classes) |
683 colnames(predicted_classes2) = c("pixel names", "x", "y","predicted condition") | 716 colnames(predicted_classes2) = c("pixel names", "x", "y","predicted condition") |
684 predicted_toplabels = topFeatures(prediction, n=Inf) | 717 predicted_toplabels = topFeatures(prediction, n=Inf) |
685 if (colnames(predicted_toplabels)[4] == "coefficients"){ | 718 if (colnames(predicted_toplabels)[4] == "coefficients"){ |
686 predicted_toplabels[,4:6] <-round(predicted_toplabels[,4:6],5) | 719 predicted_toplabels[,4:6] <-round(predicted_toplabels[,4:6],5) |
687 | |
688 }else{ | 720 }else{ |
689 predicted_toplabels[,6:9] <-round(predicted_toplabels[,6:9],5)} | 721 predicted_toplabels[,6:9] <-round(predicted_toplabels[,6:9],5)} |
722 | |
723 ##predicted classes | |
724 prediction_df = cbind(coord(prediction)[,1:2], predicted_classes) | |
725 colnames(prediction_df) = c("x", "y", "predicted_classes") | |
726 | |
727 #if str($type_cond.classification_type) == "SSC_classifier": | |
728 ## this seems to work only for SSC, therefore overwrite tables | |
729 predicted_probabilities = data.frame(prediction\$probabilities[[1]]) | |
730 predicted_classes2 = data.frame(pixel_names, x_coordinates, y_coordinates, predicted_classes, predicted_probabilities) | |
731 colnames(predicted_classes2) = c("pixel names", "x", "y","predicted condition", levels(prediction\$classes[[1]])) | |
732 ## also image modes are specific to SSC | |
733 image(prediction, mode="classes", layout=c(1,1), main="Class", col=hue_pal()(length(unique(prediction\$classes[[1]])))) | |
734 image(prediction, mode="probabilities", layout=c(1,1), main="Class probabilities", col=hue_pal()(length(unique(prediction\$classes[[1]])))) | |
735 | |
736 #else | |
737 | |
738 prediction_plot = ggplot(prediction_df, aes(x=x, y=y, fill=predicted_classes))+ | |
739 geom_tile()+ | |
740 coord_fixed()+ | |
741 ggtitle("Predicted condition for each spectrum")+ | |
742 theme_bw()+ | |
743 theme( | |
744 plot.background = element_blank(), | |
745 panel.grid.major = element_blank(), | |
746 panel.grid.minor = element_blank())+ | |
747 theme(text=element_text(family="ArialMT", face="bold", size=15))+ | |
748 theme(legend.position="bottom", legend.direction="vertical")+ | |
749 guides(fill=guide_legend(ncol=4, byrow=TRUE)) | |
750 coord_labels = aggregate(cbind(x,y)~predicted_classes, data=prediction_df, mean, na.rm=TRUE, na.action="na.pass") | |
751 coord_labels\$file_number = gsub( "_.*ยง", "", coord_labels\$predicted_classes) | |
752 print(prediction_plot) | |
753 #end if | |
754 | |
690 write.table(predicted_toplabels, file="$mzfeatures", quote = FALSE, row.names = FALSE, col.names=TRUE, sep = "\t") | 755 write.table(predicted_toplabels, file="$mzfeatures", quote = FALSE, row.names = FALSE, col.names=TRUE, sep = "\t") |
691 write.table(predicted_classes2, file="$pixeloutput", quote = FALSE, row.names = FALSE, col.names=TRUE, sep = "\t") | 756 write.table(predicted_classes2, file="$pixeloutput", quote = FALSE, row.names = FALSE, col.names=TRUE, sep = "\t") |
692 | 757 |
693 ## image with predicted classes | 758 |
694 | 759 |
695 prediction_df = cbind(coord(prediction)[,1:2], predicted_classes) | |
696 colnames(prediction_df) = c("x", "y", "predicted_classes") | |
697 | |
698 prediction_plot = ggplot(prediction_df, aes(x=x, y=y, fill=predicted_classes))+ | |
699 geom_tile() + | |
700 coord_fixed()+ | |
701 ggtitle("Predicted condition for each pixel")+ | |
702 theme_bw()+ | |
703 theme(text=element_text(family="ArialMT", face="bold", size=15))+ | |
704 theme(legend.position="bottom",legend.direction="vertical")+ | |
705 guides(fill=guide_legend(ncol=4,byrow=TRUE)) | |
706 coord_labels = aggregate(cbind(x,y)~predicted_classes, data=prediction_df, mean, na.rm=TRUE, na.action="na.pass") | |
707 coord_labels\$file_number = gsub( "_.*$", "", coord_labels\$predicted_classes) | |
708 print(prediction_plot) | |
709 | 760 |
761 #if str($type_cond.new_y_values_cond.new_y_values) == "new_response": | |
710 ## image with right and wrong classes: | 762 ## image with right and wrong classes: |
711 | 763 |
712 comparison_df = cbind(prediction_df, new_y_vector) | 764 comparison_df = cbind(prediction_df, new_y_vector) |
713 comparison_df\$correct<- as.factor(ifelse(comparison_df\$predicted_classes==comparison_df\$new_y_vector, T, F)) | 765 comparison_df\$correct<- ifelse(comparison_df\$predicted_classes==comparison_df\$new_y_vector, T, F) |
766 correctness = round(sum(comparison_df\$correct)/length(comparison_df\$correct)*100,2) | |
714 | 767 |
715 correctness_plot = ggplot(comparison_df, aes(x=x, y=y, fill=correct))+ | 768 correctness_plot = ggplot(comparison_df, aes(x=x, y=y, fill=correct))+ |
716 geom_tile()+ | 769 geom_tile()+ |
717 scale_fill_manual(values = c("TRUE" = "orange","FALSE" = "darkblue"))+ | 770 scale_fill_manual(values = c("TRUE" = "orange","FALSE" = "darkblue"))+ |
718 coord_fixed()+ | 771 coord_fixed()+ |
719 ggtitle("Correctness of classification")+ | 772 ggtitle(paste0("Correctness of classification: ",correctness, "%"))+ |
720 theme_bw()+ | 773 theme_bw()+ |
721 theme(text=element_text(family="ArialMT", face="bold", size=15))+ | 774 theme(text=element_text(family="ArialMT", face="bold", size=15))+ |
722 theme(legend.position="bottom",legend.direction="vertical")+ | 775 theme(legend.position="bottom",legend.direction="vertical")+ |
723 guides(fill=guide_legend(ncol=2,byrow=TRUE)) | 776 guides(fill=guide_legend(ncol=2,byrow=TRUE)) |
724 ## coord_labels = aggregate(cbind(x,y)~correct, data=comparison_df, mean, na.rm=TRUE, na.action="na.pass") | |
725 ##coord_labels\$file_number = gsub( "_.*$", "", coord_labels\$predicted_classes) | |
726 print(correctness_plot) | 777 print(correctness_plot) |
727 | 778 #end if |
728 | |
729 ## Summary table prediction | |
730 summary_table = summary(prediction)\$accuracy[[names(prediction@resultData)]] | |
731 summary_table2 = round(as.numeric(summary_table), digits=2) | |
732 summary_matrix = matrix(summary_table2, nrow=4, ncol=ncol(summary_table)) | |
733 summary_table3 = cbind(rownames(summary_table), summary_matrix) ## include rownames in table | |
734 summary_table4 = t(summary_table3) | |
735 summary_table5 = cbind(c(names(prediction@resultData),colnames(summary_table)), summary_table4) | |
736 plot(0,type='n',axes=FALSE,ann=FALSE) | |
737 grid.table(summary_table5, rows= NULL) | |
738 | 779 |
739 ## optional output as .RData | 780 ## optional output as .RData |
740 #if $output_rdata: | 781 #if $output_rdata: |
741 msidata = prediction | 782 msidata = prediction |
742 save(msidata, file="$classification_rdata") | 783 save(msidata, file="$classification_rdata") |
831 <conditional name="ssc_analysis_cond"> | 872 <conditional name="ssc_analysis_cond"> |
832 <param name="ssc_method" type="select" label="Analysis step to perform"> | 873 <param name="ssc_method" type="select" label="Analysis step to perform"> |
833 <option value="ssc_cvapply" selected="True">cvApply</option> | 874 <option value="ssc_cvapply" selected="True">cvApply</option> |
834 <option value="ssc_analysis">spatial shrunken centroids analysis</option> | 875 <option value="ssc_analysis">spatial shrunken centroids analysis</option> |
835 </param> | 876 </param> |
836 <when value="ssc_cvapply"/> | 877 <when value="ssc_cvapply"> |
837 | 878 <param name="write_best_params" type="boolean" label="Write out best r and s values" help="Can be used to generate automatic classification workflow"/> |
879 </when> | |
838 <when value="ssc_analysis"> | 880 <when value="ssc_analysis"> |
839 <!--param name="ssc_toplabels" type="integer" value="100" | 881 <!--param name="ssc_toplabels" type="integer" value="100" |
840 label="Number of toplabels (m/z features) which should be written in tabular output"/--> | 882 label="Number of toplabels (m/z features) which should be written in tabular output"/--> |
841 </when> | 883 </when> |
842 </conditional> | 884 </conditional> |
849 <expand macro="sanitizer_multiple_digits"/> | 891 <expand macro="sanitizer_multiple_digits"/> |
850 </param> | 892 </param> |
851 <param name="ssc_kernel_method" type="select" display="radio" label = "The method to use to calculate the spatial smoothing kernels for the embedding. The 'gaussian' method refers to spatially-aware (SA) weights, and 'adaptive' refers to spatially-aware structurally-adaptive (SASA) weights"> | 893 <param name="ssc_kernel_method" type="select" display="radio" label = "The method to use to calculate the spatial smoothing kernels for the embedding. The 'gaussian' method refers to spatially-aware (SA) weights, and 'adaptive' refers to spatially-aware structurally-adaptive (SASA) weights"> |
852 <option value="gaussian">gaussian</option> | 894 <option value="gaussian">gaussian</option> |
853 <option value="adaptive" selected="True">adaptive</option> | 895 <option value="adaptive" selected="True">adaptive</option> |
854 </param> | 896 </param> |
855 | |
856 </when> | 897 </when> |
857 </conditional> | 898 </conditional> |
858 | 899 |
859 </when> | 900 </when> |
860 | 901 |
861 <when value="prediction"> | 902 <when value="prediction"> |
862 <param name="training_result" type="data" format="rdata" label="Result from previous classification training"/> | 903 <param name="training_result" type="data" format="rdata" label="Result from previous classification training"/> |
863 <!--param name="predicted_toplabels" type="integer" value="100" | 904 <!--param name="predicted_toplabels" type="integer" value="100" |
864 label="Number of toplabels (m/z features) which should be written in tabular output"/--> | 905 label="Number of toplabels (m/z features) which should be written in tabular output"/--> |
906 <param name="classification_type" type="select" display="radio" optional="False" label="Which classification method was used"> | |
907 <option value="PLS_classifier" selected="True" >PLS classifier</option> | |
908 <option value="OPLS_classifier">OPLS classifier</option> | |
909 <option value="SSC_classifier">SSC_classifier</option> | |
910 </param> | |
865 <conditional name="new_y_values_cond"> | 911 <conditional name="new_y_values_cond"> |
866 <param name="new_y_values" type="select" label="Should new response values be used"> | 912 <param name="new_y_values" type="select" label="Load annotations (optional, but allows accuracy calculations)"> |
867 <option value="no_new_response" selected="True">old response should be used</option> | 913 <option value="no_new_response" selected="True">no</option> |
868 <option value="new_response">load new response from tabular file</option> | 914 <option value="new_response">use annotations</option> |
869 </param> | 915 </param> |
870 <when value="no_new_response"/> | 916 <when value="no_new_response"/> |
871 <when value="new_response"> | 917 <when value="new_response"> |
872 <param name="new_response_file" type="data" format="tabular" label="Load tabular file with pixel coordinates and the new response"/> | 918 <param name="new_response_file" type="data" format="tabular" label="Load tabular file with pixel coordinates and the new response"/> |
873 <param name="column_new_x" data_ref="new_response_file" label="Column with x values" type="data_column"/> | 919 <param name="column_new_x" data_ref="new_response_file" label="Column with x values" type="data_column"/> |
882 </inputs> | 928 </inputs> |
883 <outputs> | 929 <outputs> |
884 <data format="pdf" name="classification_images" from_work_dir="classificationpdf.pdf" label = "${tool.name} on ${on_string}: results"/> | 930 <data format="pdf" name="classification_images" from_work_dir="classificationpdf.pdf" label = "${tool.name} on ${on_string}: results"/> |
885 <data format="tabular" name="mzfeatures" label="${tool.name} on ${on_string}: features"/> | 931 <data format="tabular" name="mzfeatures" label="${tool.name} on ${on_string}: features"/> |
886 <data format="tabular" name="pixeloutput" label="${tool.name} on ${on_string}: pixels"/> | 932 <data format="tabular" name="pixeloutput" label="${tool.name} on ${on_string}: pixels"/> |
933 <data format="txt" name="best_r" label="${tool.name} on ${on_string}:best r"> | |
934 <filter>type_cond['type_method'] == 'training' and type_cond['method_cond']['class_method'] == 'spatialShrunkenCentroids' and type_cond['method_cond']['ssc_analysis_cond']['ssc_method'] == 'ssc_cvapply' and type_cond['method_cond']['ssc_analysis_cond']['write_best_params']</filter> | |
935 </data> | |
936 <data format="txt" name="best_s" label="${tool.name} on ${on_string}:best s"> | |
937 <filter>type_cond['type_method'] == 'training' and type_cond['method_cond']['class_method'] == 'spatialShrunkenCentroids' and type_cond['method_cond']['ssc_analysis_cond']['ssc_method'] == 'ssc_cvapply' and type_cond['method_cond']['ssc_analysis_cond']['write_best_params']</filter> | |
938 </data> | |
887 <data format="rdata" name="classification_rdata" label="${tool.name} on ${on_string}: results.RData"> | 939 <data format="rdata" name="classification_rdata" label="${tool.name} on ${on_string}: results.RData"> |
888 <filter>output_rdata</filter> | 940 <filter>output_rdata</filter> |
889 </data> | 941 </data> |
890 </outputs> | 942 </outputs> |
891 <tests> | 943 <tests> |
1099 - O-PLS-DA: Orthogonal partial least squares discriminant analysis | 1151 - O-PLS-DA: Orthogonal partial least squares discriminant analysis |
1100 - Spatial shrunken centroids (more details in `Bemis et al. <https://doi.org/10.1074/mcp.O115.053918>`_) | 1152 - Spatial shrunken centroids (more details in `Bemis et al. <https://doi.org/10.1074/mcp.O115.053918>`_) |
1101 - training and prediction | 1153 - training and prediction |
1102 | 1154 |
1103 - training can be done with cvapply that uses cross validation to find the best value for s, this requires not only a condition for each spectrum but also a fold (each fold should contain spectra of all conditions) | 1155 - training can be done with cvapply that uses cross validation to find the best value for s, this requires not only a condition for each spectrum but also a fold (each fold should contain spectra of all conditions) |
1104 - training with the best value for s gives the top m/z features for each condition and the predicted classification group for each spectrum | 1156 - training with the best value for r and s gives the top m/z features for each condition and the predicted classification group for each spectrum |
1105 - training result can be saved as RData file that can be reused for prediction of further samples | 1157 - training result can be saved as RData file that can be reused for prediction of further samples |
1158 - prediction can calculate accuracies when the annotations are known and provided | |
1106 | 1159 |
1107 | 1160 |
1108 .. image:: $PATH_TO_IMAGES/classification_overview.png | 1161 .. image:: $PATH_TO_IMAGES/classification_overview.png |
1109 :width: 1000 | 1162 :width: 1000 |
1110 :height: 465 | 1163 :height: 465 |