comparison quality_report.xml @ 9:0d4d4f16d455 draft

"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cardinal commit d008f6ea0f5c8435fb975a34cb99ea4d42c5ebd2"
author galaxyp
date Wed, 13 May 2020 14:15:15 -0400
parents bb9500286fe4
children f365bad862c9
comparison
equal deleted inserted replaced
8:bb9500286fe4 9:0d4d4f16d455
1 <tool id="cardinal_quality_report" name="MSI Qualitycontrol" version="@VERSION@.0"> 1 <tool id="cardinal_quality_report" name="MSI Qualitycontrol" version="@VERSION@.1">
2 <description> 2 <description>
3 mass spectrometry imaging QC 3 mass spectrometry imaging QC
4 </description> 4 </description>
5 <macros> 5 <macros>
6 <import>macros.xml</import> 6 <import>macros.xml</import>
38 ## in case RData input is MSImageSet: 38 ## in case RData input is MSImageSet:
39 if (class(msidata) == "MSImageSet"){ 39 if (class(msidata) == "MSImageSet"){
40 msidata = as(msidata, "MSImagingExperiment") 40 msidata = as(msidata, "MSImagingExperiment")
41 run(msidata) = "infile" 41 run(msidata) = "infile"
42 } 42 }
43 print(class(msidata)) 43
44 ## remove duplicated coordinates 44 ## remove duplicated coordinates
45 msidata <- msidata[,!duplicated(coord(msidata))] 45 msidata <- msidata[,!duplicated(coord(msidata))]
46 46
47 ## optional annotation from tabular file to obtain pixel groups (otherwise all pixels are considered to be one sample) 47 ## optional annotation from tabular file to obtain pixel groups (otherwise all pixels are considered to be one sample)
48 48
72 ## Spectra multiplied with m/z (potential number of peaks) 72 ## Spectra multiplied with m/z (potential number of peaks)
73 numpeaks = ncol(msidata)*nrow(msidata) 73 numpeaks = ncol(msidata)*nrow(msidata)
74 ## Percentage of intensities > 0 74 ## Percentage of intensities > 0
75 percpeaks = round(npeaks/numpeaks*100, digits=2) 75 percpeaks = round(npeaks/numpeaks*100, digits=2)
76 ## Number of empty TICs 76 ## Number of empty TICs
77 TICs = colSums(spectra(msidata), na.rm=TRUE) 77 TICs = pixelApply(msidata, sum)
78 NumemptyTIC = sum(TICs == 0) 78 NumemptyTIC = sum(TICs == 0)
79 ## Median und sd TIC 79 ## Median und sd TIC
80 medTIC = round(median(TICs), digits=1) 80 medTIC = round(median(TICs), digits=1)
81 sdTIC = round(sd(TICs), digits=0) 81 sdTIC = round(sd(TICs), digits=0)
82 ## Median and sd # peaks per spectrum 82 ## Median and sd # peaks per spectrum
327 327
328 ### filter spectra for max m/z to have two vectors, which can be divided 328 ### filter spectra for max m/z to have two vectors, which can be divided
329 ### plot spatial distribution of fold change 329 ### plot spatial distribution of fold change
330 330
331 ## calculate mean intensity for each m/z over the ppm range; then calculate log2 foldchange 331 ## calculate mean intensity for each m/z over the ppm range; then calculate log2 foldchange
332 mass1vector = colMeans(spectra(filtered_data1), na.rm =TRUE) 332 mass1vector = pixelApply(filtered_data1, mean, na.rm =TRUE)
333 mass2vector = colMeans(spectra(filtered_data2), na.rm = TRUE) 333 mass2vector = pixelApply(filtered_data2, mean, na.rm = TRUE)
334 foldchange= log2(mass1vector/mass2vector) 334 foldchange= log2(mass1vector/mass2vector)
335 fcmatrix = data.frame(coord(msidata)\$x, coord(msidata)\$y,foldchange) 335 fcmatrix = data.frame(coord(msidata)\$x, coord(msidata)\$y,foldchange)
336 colnames(fcmatrix) = c("x", "y", "foldchange") 336 colnames(fcmatrix) = c("x", "y", "foldchange")
337 337
338 print(ggplot(fcmatrix, aes(x=x, y=y, fill=foldchange))+ 338 print(ggplot(fcmatrix, aes(x=x, y=y, fill=foldchange))+
608 abline(v=median(peakspermz), col="blue") 608 abline(v=median(peakspermz), col="blue")
609 609
610 ########################## 13) Sum of intensities per m/z ################## 610 ########################## 13) Sum of intensities per m/z ##################
611 611
612 ## Sum of all intensities for each m/z (like TIC, but for m/z instead of pixel) 612 ## Sum of all intensities for each m/z (like TIC, but for m/z instead of pixel)
613 mzTIC = rowSums(spectra(msidata), na.rm=TRUE) ## calculate intensity sum for each m/z 613 mzTIC = featureApply(msidata, sum, na.rm=TRUE) ## calculate intensity sum for each m/z
614 614
615 par(mfrow = c(2,1), mar=c(5,6,4,2)) 615 par(mfrow = c(2,1), mar=c(5,6,4,2))
616 ## 13a) scatterplot 616 ## 13a) scatterplot
617 plot_colorByDensity(mz(msidata),mzTIC, main= "Sum of intensities per m/z", ylab ="") 617 plot_colorByDensity(mz(msidata),mzTIC, main= "Sum of intensities per m/z", ylab ="")
618 title(xlab="m/z", line=2.5) 618 title(xlab="m/z", line=2.5)
631 ########################## 14) Intensity distribution ###################### 631 ########################## 14) Intensity distribution ######################
632 632
633 par(mfrow = c(2,1), mar=c(5,6,4,2)) 633 par(mfrow = c(2,1), mar=c(5,6,4,2))
634 634
635 ## 14a) Median intensity over spectra 635 ## 14a) Median intensity over spectra
636 medianint_spectra = apply(spectra(msidata), 2, median, na.rm=TRUE) 636 medianint_spectra = pixelApply(msidata, median)
637 plot(medianint_spectra, main="Median intensity per spectrum",las=1, xlab="Spectra index", ylab="") 637 plot(medianint_spectra, main="Median intensity per spectrum",las=1, xlab="Spectra index", ylab="")
638 title(ylab="Median spectrum intensity", line=4) 638 title(ylab="Median spectrum intensity", line=4)
639 if (!is.null(levels(msidata\$annotation))){ 639 if (!is.null(levels(msidata\$annotation))){
640 abline(v=abline_vector, lty = 3)} 640 abline(v=abline_vector, lty = 3)}
641 641
680 mean_matrix = cbind(mean_matrix, mean_mz_sample)} 680 mean_matrix = cbind(mean_matrix, mean_mz_sample)}
681 681
682 boxplot(log10(mean_matrix), ylab = "Log10 mean intensity per m/z", main="Log10 mean m/z intensities per annotation group", xaxt = "n") 682 boxplot(log10(mean_matrix), ylab = "Log10 mean intensity per m/z", main="Log10 mean m/z intensities per annotation group", xaxt = "n")
683 (axis(1, at = c(1:number_combined), labels=levels(msidata\$annotation), las=2)) 683 (axis(1, at = c(1:number_combined), labels=levels(msidata\$annotation), las=2))
684 684
685 ## 14e) Heatmap of pearson correlation on mean intensities between annotation groups 685 ## 14e) Heatmap of mean intensities of annotation groups
686 686
687 corr_matrix = mean_matrix 687 colnames(mean_matrix) = levels(msidata\$annotation)
688 corr_matrix[corr_matrix == 0] <- NA 688 mean_matrix[is.na(mean_matrix)] = 0
689 colnames(corr_matrix) = levels(msidata\$annotation) 689 heatmap.parameters <- list(mean_matrix,
690
691 ## pearson correlation is only possible if there are at least 2 groups
692 if (length(colnames)>1)
693 {
694 corr_matrix = cor(corr_matrix, method= "pearson",use="complete.obs")
695
696 heatmap.parameters <- list(corr_matrix,
697 show_rownames = T, show_colnames = T, 690 show_rownames = T, show_colnames = T,
698 main = "Pearson correlation on mean intensities") 691 main = "Heatmap of mean intensities per annotation group")
699 do.call("pheatmap", heatmap.parameters) 692 par(oma=c(3,0,0,0))
700 } 693 print(heatmap(mean_matrix),margins = c(10, 10))
694
695
696 ## 14f) PCA of mean intensities of annotation groups
697
698 ## define annotation by colour
699 annotation_colour = rainbow(length(levels(msidata\$annotation)))[as.factor(levels(msidata\$annotation))]
700 ## transform and scale dataframe
701 pca = prcomp(t(mean_matrix),center=FALSE,scale.=FALSE)
702 ## plot single plot
703 plot(pca\$x[,c(1,2)],col=annotation_colour,pch=19)
704 ## plot pca with colours for max first 5 PCs
705 pc_comp = ifelse(ncol(pca\$x)<5 , ncol(pca\$x), 5)
706 pairs(pca\$x[,1:pc_comp],col=annotation_colour,pch=19)
707 legend("bottom", horiz = TRUE, legend=levels(msidata\$annotation), col=rainbow(length(levels(msidata\$annotation))), pch=19)
708
701 } 709 }
702 710
703 ################################## VI) Mass spectra and m/z accuracy ######################## 711 ################################## VI) Mass spectra and m/z accuracy ########################
704 ############################################################################ 712 ############################################################################
705 print("Mass spectra and m/z accuracy") 713 print("Mass spectra and m/z accuracy")
708 716
709 ## replace any NA with 0, otherwise plot function will not work at all 717 ## replace any NA with 0, otherwise plot function will not work at all
710 msidata_no_NA = msidata 718 msidata_no_NA = msidata
711 719
712 ## find three equal m/z ranges for the average mass spectra plots: 720 ## find three equal m/z ranges for the average mass spectra plots:
713 third_mz_range = nrow(msidata_no_NA)/3 721 third_mz_range = round(nrow(msidata_no_NA)/3,0)
714 722
715 par(cex.axis=1, cex.lab=1, mar=c(5.1,4.1,4.1,2.1)) 723 par(cex.axis=1, cex.lab=1, mar=c(5.1,4.1,4.1,2.1))
716 print(plot(msidata_no_NA, run="infile", layout=c(2,2), strip=FALSE, main= "Average spectrum")) 724 print(plot(msidata_no_NA, run="infile", layout=c(2,2), strip=FALSE, main= "Average spectrum"))
717 print(plot(msidata_no_NA[1:third_mz_range,], run="infile", layout=FALSE, strip=FALSE, main= "Zoomed average spectrum")) 725 print(plot(msidata_no_NA[1:third_mz_range,], layout=FALSE, run="infile", strip=FALSE, main="Zoomed average spectrum"))
718 print(plot(msidata_no_NA[third_mz_range:(2*third_mz_range),], run="infile", layout=FALSE, strip=FALSE, main= "Zoomed average spectrum")) 726 print(plot(msidata_no_NA[third_mz_range:(2*third_mz_range),], layout=FALSE, run="infile", strip=FALSE, main="Zoomed average spectrum"))
719 print(plot(msidata_no_NA[(2*third_mz_range):nrow(msidata_no_NA),], run="infile", layout=FALSE, strip=FALSE, main= "Zoomed average spectrum")) 727 print(plot(msidata_no_NA[(2*third_mz_range):nrow(msidata_no_NA),], layout=FALSE, run="infile", strip=FALSE, main="Zoomed average spectrum"))
720 728
721 ## plot one average mass spectrum for each pixel annotation group 729 ## plot one average mass spectrum for each pixel annotation group
722 730
723 if (!is.null(levels(msidata\$annotation))){ 731 if (!is.null(levels(msidata\$annotation))){
724 ## print legend only for less than 10 samples 732 ## print legend only for less than 10 samples
760 768
761 ### find m/z with the highest mean intensity in m/z range (red line in plot 16) and calculate ppm difference for plot 17 769 ### find m/z with the highest mean intensity in m/z range (red line in plot 16) and calculate ppm difference for plot 17
762 filtered_data = msidata_no_NA[mz(msidata_no_NA) >= inputcalibrantmasses[mass]-plusminusvalues[mass] & mz(msidata_no_NA) <= inputcalibrantmasses[mass]+plusminusvalues[mass],] 770 filtered_data = msidata_no_NA[mz(msidata_no_NA) >= inputcalibrantmasses[mass]-plusminusvalues[mass] & mz(msidata_no_NA) <= inputcalibrantmasses[mass]+plusminusvalues[mass],]
763 771
764 if (nrow(filtered_data) > 0 & sum(spectra(filtered_data)) > 0){ 772 if (nrow(filtered_data) > 0 & sum(spectra(filtered_data)) > 0){
765 maxmassrow = rowMeans(spectra(filtered_data)) ## for each m/z average intensity is calculated 773 maxmassrow = featureApply(filtered_data, mean) ## for each m/z average intensity is calculated
766 maxvalue = mz(filtered_data)[which.max(maxmassrow)] ### m/z with highest average intensity in m/z range 774 maxvalue = mz(filtered_data)[which.max(maxmassrow)] ### m/z with highest average intensity in m/z range
767 mzdifference = maxvalue - inputcalibrantmasses[mass] ### difference: theoretical value - closest m/z value 775 mzdifference = maxvalue - inputcalibrantmasses[mass] ### difference: theoretical value - closest m/z value
768 ppmdifference = mzdifference/inputcalibrantmasses[mass]*1000000 ### calculate ppm for accuracy measurement 776 ppmdifference = mzdifference/inputcalibrantmasses[mass]*1000000 ### calculate ppm for accuracy measurement
769 }else{ 777 }else{
770 ppmdifference = NA 778 ppmdifference = NA
1113 1121
1114 - Median intensity per spectrum: Scatter plot in which each point represents the median intensity for one spectrum. Dotted lines in the scatter plot separate spectra of different annotation groups. 1122 - Median intensity per spectrum: Scatter plot in which each point represents the median intensity for one spectrum. Dotted lines in the scatter plot separate spectra of different annotation groups.
1115 - Histogram of intensities. 1123 - Histogram of intensities.
1116 - (annot) Intensities per annotation group: Same histogram as before but with colours to show the contribution of each pixel annotation group. 1124 - (annot) Intensities per annotation group: Same histogram as before but with colours to show the contribution of each pixel annotation group.
1117 - (annot) Log10 mean intensities per m/z and annotation group: For all pixels of an annotation group the log10 mean intensity for each m/z is calculated and shown as boxplot. 1125 - (annot) Log10 mean intensities per m/z and annotation group: For all pixels of an annotation group the log10 mean intensity for each m/z is calculated and shown as boxplot.
1118 - (annot) Pearson correlation between annotation groups (needs at least 2 groups) based on mean intensities and shown as heatmap. 1126 - (annot) Heatmap of mean intensity per m/z
1127 - (annot) PCA of mean intensity per m/z
1119 1128
1120 **Mass spectra and m/z accuracy** 1129 **Mass spectra and m/z accuracy**
1121 1130
1122 - Average mass spectra: First plot shows the average spectrum over the full m/z range, the other three plots zoom into the m/z axis. 1131 - Average mass spectra: First plot shows the average spectrum over the full m/z range, the other three plots zoom into the m/z axis.
1123 - (annot) Average mass spectrum per annotation group. 1132 - (annot) Average mass spectrum per annotation group.