diff quality_report.xml @ 4:3b7a35d50ebf draft

planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cardinal commit ecdc3a64aa245d80dbc5487b2bf10a85a43adc6d
author galaxyp
date Fri, 22 Mar 2019 08:11:43 -0400
parents 16556ca0196b
children f0d1f3e97303
line wrap: on
line diff
--- a/quality_report.xml	Thu Feb 28 09:26:25 2019 -0500
+++ b/quality_report.xml	Fri Mar 22 08:11:43 2019 -0400
@@ -1,4 +1,4 @@
-<tool id="cardinal_quality_report" name="MSI Qualitycontrol" version="@VERSION@.2">
+<tool id="cardinal_quality_report" name="MSI Qualitycontrol" version="@VERSION@.3">
     <description>
         mass spectrometry imaging QC
     </description>
@@ -36,11 +36,7 @@
 
 @READING_MSIDATA_INRAM@
 
-## create full matrix to make processed imzML files compatible with segmentation and other steps
-iData(msidata) <- iData(msidata)[]
-
 ## remove duplicated coordinates
-print(paste0(sum(duplicated(coord(msidata))), " duplicated coordinates were removed"))
 msidata <- msidata[,!duplicated(coord(msidata))]
 
 ## optional annotation from tabular file to obtain pixel groups (otherwise all pixels are considered to be one sample)
@@ -67,20 +63,20 @@
 @DATA_PROPERTIES_INRAM@
 
 ## Median intensities
-medint = round(median(spectra(msidata)[], na.rm=TRUE), digits=2)
+medint = round(median(spectra(msidata), na.rm=TRUE), digits=2)
 ## Spectra multiplied with m/z (potential number of peaks)
 numpeaks = ncol(msidata)*nrow(msidata)
 ## Percentage of intensities > 0
 percpeaks = round(npeaks/numpeaks*100, digits=2)
 ## Number of empty TICs
-TICs = colSums(spectra(msidata)[], na.rm=TRUE) 
+TICs = colSums(spectra(msidata), na.rm=TRUE) 
 NumemptyTIC = sum(TICs == 0)
 ## Median und sd TIC
 medTIC = round(median(TICs), digits=1)
 sdTIC = round(sd(TICs), digits=0)
 ## Median and sd # peaks per spectrum
-medpeaks = round(median(colSums(spectra(msidata)[]>0, na.rm=TRUE), na.rm=TRUE), digits=0)
-sdpeaks = round(sd(colSums(spectra(msidata)[]>0, na.rm=TRUE), na.rm=TRUE), digits=0)
+medpeaks = round(median(colSums(spectra(msidata)>0, na.rm=TRUE), na.rm=TRUE), digits=0)
+sdpeaks = round(sd(colSums(spectra(msidata)>0, na.rm=TRUE), na.rm=TRUE), digits=0)
 ## Processing informations
 centroidedinfo = centroided(msidata)
 
@@ -247,15 +243,15 @@
 
             filtered_data = msidata[mz(msidata) >= inputcalibrantmasses[mass]-plusminusvalues[mass] & mz(msidata) <= inputcalibrantmasses[mass]+plusminusvalues[mass],]
 
-            if (nrow(filtered_data) > 1 & sum(spectra(filtered_data)[],na.rm=TRUE) > 0){
+            if (nrow(filtered_data) > 1 & sum(spectra(filtered_data),na.rm=TRUE) > 0){
 
                 ## intensity of all m/z > 0
-                intensity_sum = colSums(spectra(filtered_data)[], na.rm=TRUE) > 0
+                intensity_sum = colSums(spectra(filtered_data), na.rm=TRUE) > 0
 
-            }else if(nrow(filtered_data) == 1 & sum(spectra(filtered_data)[], na.rm=TRUE) > 0){
+            }else if(nrow(filtered_data) == 1 & sum(spectra(filtered_data), na.rm=TRUE) > 0){
 
                 ## intensity of only m/z > 0
-                intensity_sum = spectra(filtered_data)[] > 0 
+                intensity_sum = spectra(filtered_data) > 0 
 
             }else{
 
@@ -364,7 +360,7 @@
     #################### 5) Number of peaks per pixel - image ##################
 
     ## here every intensity value > 0 counts as peak
-    peaksperpixel = colSums(spectra(msidata)[]> 0, na.rm=TRUE)
+    peaksperpixel = colSums(spectra(msidata)> 0, na.rm=TRUE)
     peakscoordarray=cbind(coord(msidata)[,1:2], peaksperpixel)
 
     print(ggplot(peakscoordarray, aes(x=x, y=y, fill=peaksperpixel))+
@@ -400,7 +396,7 @@
 
     ############################### 6b) median int image ###############################
 
-    median_int = apply(spectra(msidata)[],2,median) 
+    median_int = apply(spectra(msidata),2,median) 
     median_coordarray=cbind(coord(msidata)[,1:2], median_int)
 
     print(ggplot(median_coordarray, aes(x=x, y=y, fill=median_int))+
@@ -418,7 +414,7 @@
 
     ############################### 6c) max int image ###############################
 
-    max_int = apply(spectra(msidata)[],2,max) 
+    max_int = apply(spectra(msidata),2,max) 
     max_coordarray=cbind(coord(msidata)[,1:2], max_int)
 
     print(ggplot(max_coordarray, aes(x=x, y=y, fill=max_int))+
@@ -437,7 +433,7 @@
     ############################### 7) Most abundant m/z image #################
 
     ## for each spectrum find the row (m/z) with the highest intensity
-    highestmz = apply(spectra(msidata)[],2,which.max)
+    highestmz = apply(spectra(msidata),2,which.max)
     ## in case for some spectra max returns integer(0), highestmz is a list and integer(0) have to be replaced with NA and unlisted
     if (class(highestmz) == "list"){
         ##find zero-length values
@@ -477,7 +473,7 @@
         image(pca, col=c("black", "white"), strip=FALSE, ylim= c(maximumy+0.2*maximumy,minimumy-1))
 
         for (PCs in 1:2){
-            print(image(pca, column = c(paste0("PC",PCs)) , superpose = FALSE, col.regions = risk.colors(100), ylim=c(maximumy+2, minimumy-2)))}
+            print(image(pca, column = c(paste0("PC",PCs)) , strip=FALSE, superpose = FALSE, main=paste0("PC", PCs), col.regions = risk.colors(100), ylim=c(maximumy+2, minimumy-2)))}
 
     ## remove pca to clean up RAM space
         rm(pca)
@@ -575,7 +571,7 @@
 
     ########################## 12) Number of peaks per m/z #####################
 
-    peakspermz = rowSums(spectra(msidata)[] > 0, na.rm=TRUE)
+    peakspermz = rowSums(spectra(msidata) > 0, na.rm=TRUE)
 
     par(mfrow = c(2,1), mar=c(5,6,4,4.5))
     ## 12a) scatterplot
@@ -594,7 +590,7 @@
     ########################## 13) Sum of intensities per m/z ##################
 
     ## Sum of all intensities for each m/z (like TIC, but for m/z instead of pixel)
-    mzTIC = rowSums(spectra(msidata)[], na.rm=TRUE) ## calculate intensity sum for each m/z
+    mzTIC = rowSums(spectra(msidata), na.rm=TRUE) ## calculate intensity sum for each m/z
 
     par(mfrow = c(2,1), mar=c(5,6,4,2))
     ## 13a) scatterplot
@@ -625,7 +621,7 @@
         abline(v=abline_vector, lty = 3)}
 
     ## 14b) histogram: 
-    hist(spectra(msidata)[], main="", xlab = "", ylab="", las=1)
+    hist(spectra(msidata), main="", xlab = "", ylab="", las=1)
     title(main="Intensity histogram", line=2)
     title(xlab="intensities")
     title(ylab="Frequency", line=4)
@@ -664,7 +660,7 @@
             mean_mz_sample = rowMeans(spectra(msidata)[,msidata\$annotation==subsample],na.rm=TRUE)
             mean_matrix = cbind(mean_matrix, mean_mz_sample)}
 
-        boxplot(mean_matrix, ylab = "Mean intensity per m/z", main="Mean m/z intensities per annotation group", xaxt = "n")
+        boxplot(log10(mean_matrix), ylab = "Log10 mean intensity per m/z", main="Log10 mean m/z intensities per annotation group", xaxt = "n")
         (axis(1, at = c(1:number_combined), labels=levels(msidata\$annotation), las=2))
 
         ## 14e) Heatmap of pearson correlation on mean intensities between annotation groups
@@ -692,7 +688,7 @@
 
     ## replace any NA with 0, otherwise plot function will not work at all
     msidata_no_NA = msidata
-    spectra(msidata_no_NA)[is.na(spectra(msidata_no_NA)[])] = 0
+    spectra(msidata_no_NA)[is.na(spectra(msidata_no_NA))] = 0
 
     ## find three equal m/z ranges for the average mass spectra plots: 
     third_mz_range = nrow(msidata_no_NA)/3
@@ -703,6 +699,17 @@
     plot(msidata_no_NA[third_mz_range:(2*third_mz_range),], pixel = 1:ncol(msidata_no_NA), main= "Zoomed average spectrum")
     plot(msidata_no_NA[(2*third_mz_range):nrow(msidata_no_NA),], pixel = 1:ncol(msidata_no_NA), main= "Zoomed average spectrum")
 
+    ## plot one average mass spectrum for each pixel annotation group
+
+    if (!is.null(levels(msidata\$annotation))){
+        ## print legend only for less than 10 samples
+        if (length(levels(msidata\$annotation)) < 10){
+            key_legend = TRUE
+        }else{key_legend = FALSE}
+        par(mfrow = c(1,1), cex.axis=1, cex.lab=1, mar=c(5.1,4.1,4.1,2.1))
+        plot(msidata, pixel=1:ncol(msidata), pixel.groups=msidata\$annotation, key=key_legend, col=hue_pal()(length(levels(msidata\$annotation))),superpose=TRUE, main="Average mass spectra for annotation groups")
+    }
+
     ## plot 4 random mass spectra
     ## find four random pixel to plot their spectra in the following plots:
     pixel1 = sample(pixelnumber,1)
@@ -1090,12 +1097,14 @@
 - Median intensity per spectrum: Scatter plot in which each point represents the median intensity for one spectrum. Dotted lines in the scatter plot separate spectra of different annotation groups.
 - Histogram of intensities. 
 - (annot) Intensities per annotation group: Same histogram as before but with colours to show the contribution of each pixel annotation group. 
-- (annot) Mean intensities per m/z and annotation group: For all pixels of an annotation group the mean intensity for each m/z is calculated and shown as boxplot. 
+- (annot) Log10 mean intensities per m/z and annotation group: For all pixels of an annotation group the log10 mean intensity for each m/z is calculated and shown as boxplot. 
 - (annot) Pearson correlation between annotation groups (needs at least 2 groups) based on mean intensities and shown as heatmap.
 
 **Mass spectra and m/z accuracy**
 
 - Average mass spectra: First plot shows the average spectrum over the full m/z range, the other three plots zoom into the m/z axis.
+- (annot) Average mass spectrum per annotation group.
+- Random mass spectra: The mass spectra for four random pixel are plotted.
 - (cal) For each calibrant four zoomed average mass spectrum are drawn with different zooming level. The theoretical calibrant m/z (taken from the input file) is represented by the dashed blue line. The dotted blue lines show the given ppm range. The green line is the m/z value that is closest to the theoretical calibrant and the red line is the m/z with the highest average intensity in the m/z window. In the second spectrum each blue dot indicates one data point.
 - (annot) Average spectrum per annotation group: For each calibrant a zoomed in mass spectrum is plotted this time with the average intensities for each annotation group separately. 
 - (cal) Difference m/z with max. average intensity vs. theor. calibrant m/z: The difference in ppm between the m/z with the highest average intensity and the theoretical m/z are plotted for each calibrant. This corresponds to the difference between the dashed blue line and the red line in the zoomed in mass spectra.