Mercurial > repos > galaxyp > cardinal_quality_report

diff quality_report.xml @ 4:3b7a35d50ebf draft
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cardinal commit ecdc3a64aa245d80dbc5487b2bf10a85a43adc6d
author: galaxyp
date: Fri, 22 Mar 2019 08:11:43 -0400
parents: 16556ca0196b
children: f0d1f3e97303
--- a/quality_report.xml	Thu Feb 28 09:26:25 2019 -0500
+++ b/quality_report.xml	Fri Mar 22 08:11:43 2019 -0400
@@ -1,4 +1,4 @@
-<tool id="cardinal_quality_report" name="MSI Qualitycontrol" version="@VERSION@.2">
+<tool id="cardinal_quality_report" name="MSI Qualitycontrol" version="@VERSION@.3">
     <description>
         mass spectrometry imaging QC
     </description>
@@ -36,11 +36,7 @@
 
 @READING_MSIDATA_INRAM@
 
-## create full matrix to make processed imzML files compatible with segmentation and other steps
-iData(msidata) <- iData(msidata)[]
-
 ## remove duplicated coordinates
-print(paste0(sum(duplicated(coord(msidata))), " duplicated coordinates were removed"))
 msidata <- msidata[,!duplicated(coord(msidata))]
 
 ## optional annotation from tabular file to obtain pixel groups (otherwise all pixels are considered to be one sample)
@@ -67,20 +63,20 @@
 @DATA_PROPERTIES_INRAM@
 
 ## Median intensities
-medint = round(median(spectra(msidata)[], na.rm=TRUE), digits=2)
+medint = round(median(spectra(msidata), na.rm=TRUE), digits=2)
 ## Spectra multiplied with m/z (potential number of peaks)
 numpeaks = ncol(msidata)*nrow(msidata)
 ## Percentage of intensities > 0
 percpeaks = round(npeaks/numpeaks*100, digits=2)
 ## Number of empty TICs
-TICs = colSums(spectra(msidata)[], na.rm=TRUE) 
+TICs = colSums(spectra(msidata), na.rm=TRUE) 
 NumemptyTIC = sum(TICs == 0)
 ## Median und sd TIC
 medTIC = round(median(TICs), digits=1)
 sdTIC = round(sd(TICs), digits=0)
 ## Median and sd # peaks per spectrum
-medpeaks = round(median(colSums(spectra(msidata)[]>0, na.rm=TRUE), na.rm=TRUE), digits=0)
-sdpeaks = round(sd(colSums(spectra(msidata)[]>0, na.rm=TRUE), na.rm=TRUE), digits=0)
+medpeaks = round(median(colSums(spectra(msidata)>0, na.rm=TRUE), na.rm=TRUE), digits=0)
+sdpeaks = round(sd(colSums(spectra(msidata)>0, na.rm=TRUE), na.rm=TRUE), digits=0)
 ## Processing informations
 centroidedinfo = centroided(msidata)
 
@@ -247,15 +243,15 @@
 
             filtered_data = msidata[mz(msidata) >= inputcalibrantmasses[mass]-plusminusvalues[mass] & mz(msidata) <= inputcalibrantmasses[mass]+plusminusvalues[mass],]
 
-            if (nrow(filtered_data) > 1 & sum(spectra(filtered_data)[],na.rm=TRUE) > 0){
+            if (nrow(filtered_data) > 1 & sum(spectra(filtered_data),na.rm=TRUE) > 0){
 
                 ## intensity of all m/z > 0
-                intensity_sum = colSums(spectra(filtered_data)[], na.rm=TRUE) > 0
+                intensity_sum = colSums(spectra(filtered_data), na.rm=TRUE) > 0
 
-            }else if(nrow(filtered_data) == 1 & sum(spectra(filtered_data)[], na.rm=TRUE) > 0){
+            }else if(nrow(filtered_data) == 1 & sum(spectra(filtered_data), na.rm=TRUE) > 0){
 
                 ## intensity of only m/z > 0
-                intensity_sum = spectra(filtered_data)[] > 0 
+                intensity_sum = spectra(filtered_data) > 0 
 
             }else{
 
@@ -364,7 +360,7 @@
     #################### 5) Number of peaks per pixel - image ##################
 
     ## here every intensity value > 0 counts as peak
-    peaksperpixel = colSums(spectra(msidata)[]> 0, na.rm=TRUE)
+    peaksperpixel = colSums(spectra(msidata)> 0, na.rm=TRUE)
     peakscoordarray=cbind(coord(msidata)[,1:2], peaksperpixel)
 
     print(ggplot(peakscoordarray, aes(x=x, y=y, fill=peaksperpixel))+
@@ -400,7 +396,7 @@
 
     ############################### 6b) median int image ###############################
 
-    median_int = apply(spectra(msidata)[],2,median) 
+    median_int = apply(spectra(msidata),2,median) 
     median_coordarray=cbind(coord(msidata)[,1:2], median_int)
 
     print(ggplot(median_coordarray, aes(x=x, y=y, fill=median_int))+
@@ -418,7 +414,7 @@
 
     ############################### 6c) max int image ###############################
 
-    max_int = apply(spectra(msidata)[],2,max) 
+    max_int = apply(spectra(msidata),2,max) 
     max_coordarray=cbind(coord(msidata)[,1:2], max_int)
 
     print(ggplot(max_coordarray, aes(x=x, y=y, fill=max_int))+
@@ -437,7 +433,7 @@
     ############################### 7) Most abundant m/z image #################
 
     ## for each spectrum find the row (m/z) with the highest intensity
-    highestmz = apply(spectra(msidata)[],2,which.max)
+    highestmz = apply(spectra(msidata),2,which.max)
     ## in case for some spectra max returns integer(0), highestmz is a list and integer(0) have to be replaced with NA and unlisted
     if (class(highestmz) == "list"){
         ##find zero-length values
@@ -477,7 +473,7 @@
         image(pca, col=c("black", "white"), strip=FALSE, ylim= c(maximumy+0.2*maximumy,minimumy-1))
 
         for (PCs in 1:2){
-            print(image(pca, column = c(paste0("PC",PCs)) , superpose = FALSE, col.regions = risk.colors(100), ylim=c(maximumy+2, minimumy-2)))}
+            print(image(pca, column = c(paste0("PC",PCs)) , strip=FALSE, superpose = FALSE, main=paste0("PC", PCs), col.regions = risk.colors(100), ylim=c(maximumy+2, minimumy-2)))}
 
     ## remove pca to clean up RAM space
         rm(pca)
@@ -575,7 +571,7 @@
 
     ########################## 12) Number of peaks per m/z #####################
 
-    peakspermz = rowSums(spectra(msidata)[] > 0, na.rm=TRUE)
+    peakspermz = rowSums(spectra(msidata) > 0, na.rm=TRUE)
 
     par(mfrow = c(2,1), mar=c(5,6,4,4.5))
     ## 12a) scatterplot
@@ -594,7 +590,7 @@
     ########################## 13) Sum of intensities per m/z ##################
 
     ## Sum of all intensities for each m/z (like TIC, but for m/z instead of pixel)
-    mzTIC = rowSums(spectra(msidata)[], na.rm=TRUE) ## calculate intensity sum for each m/z
+    mzTIC = rowSums(spectra(msidata), na.rm=TRUE) ## calculate intensity sum for each m/z
 
     par(mfrow = c(2,1), mar=c(5,6,4,2))
     ## 13a) scatterplot
@@ -625,7 +621,7 @@
         abline(v=abline_vector, lty = 3)}
 
     ## 14b) histogram: 
-    hist(spectra(msidata)[], main="", xlab = "", ylab="", las=1)
+    hist(spectra(msidata), main="", xlab = "", ylab="", las=1)
     title(main="Intensity histogram", line=2)
     title(xlab="intensities")
     title(ylab="Frequency", line=4)
@@ -664,7 +660,7 @@
             mean_mz_sample = rowMeans(spectra(msidata)[,msidata\$annotation==subsample],na.rm=TRUE)
             mean_matrix = cbind(mean_matrix, mean_mz_sample)}
 
-        boxplot(mean_matrix, ylab = "Mean intensity per m/z", main="Mean m/z intensities per annotation group", xaxt = "n")
+        boxplot(log10(mean_matrix), ylab = "Log10 mean intensity per m/z", main="Log10 mean m/z intensities per annotation group", xaxt = "n")
         (axis(1, at = c(1:number_combined), labels=levels(msidata\$annotation), las=2))
 
         ## 14e) Heatmap of pearson correlation on mean intensities between annotation groups
@@ -692,7 +688,7 @@
 
     ## replace any NA with 0, otherwise plot function will not work at all
     msidata_no_NA = msidata
-    spectra(msidata_no_NA)[is.na(spectra(msidata_no_NA)[])] = 0
+    spectra(msidata_no_NA)[is.na(spectra(msidata_no_NA))] = 0
 
     ## find three equal m/z ranges for the average mass spectra plots: 
     third_mz_range = nrow(msidata_no_NA)/3
@@ -703,6 +699,17 @@
     plot(msidata_no_NA[third_mz_range:(2*third_mz_range),], pixel = 1:ncol(msidata_no_NA), main= "Zoomed average spectrum")
     plot(msidata_no_NA[(2*third_mz_range):nrow(msidata_no_NA),], pixel = 1:ncol(msidata_no_NA), main= "Zoomed average spectrum")
 
+    ## plot one average mass spectrum for each pixel annotation group
+
+    if (!is.null(levels(msidata\$annotation))){
+        ## print legend only for less than 10 samples
+        if (length(levels(msidata\$annotation)) < 10){
+            key_legend = TRUE
+        }else{key_legend = FALSE}
+        par(mfrow = c(1,1), cex.axis=1, cex.lab=1, mar=c(5.1,4.1,4.1,2.1))
+        plot(msidata, pixel=1:ncol(msidata), pixel.groups=msidata\$annotation, key=key_legend, col=hue_pal()(length(levels(msidata\$annotation))),superpose=TRUE, main="Average mass spectra for annotation groups")
+    }
+
     ## plot 4 random mass spectra
     ## find four random pixel to plot their spectra in the following plots:
     pixel1 = sample(pixelnumber,1)
@@ -1090,12 +1097,14 @@
 - Median intensity per spectrum: Scatter plot in which each point represents the median intensity for one spectrum. Dotted lines in the scatter plot separate spectra of different annotation groups.
 - Histogram of intensities. 
 - (annot) Intensities per annotation group: Same histogram as before but with colours to show the contribution of each pixel annotation group. 
-- (annot) Mean intensities per m/z and annotation group: For all pixels of an annotation group the mean intensity for each m/z is calculated and shown as boxplot. 
+- (annot) Log10 mean intensities per m/z and annotation group: For all pixels of an annotation group the log10 mean intensity for each m/z is calculated and shown as boxplot. 
 - (annot) Pearson correlation between annotation groups (needs at least 2 groups) based on mean intensities and shown as heatmap.
 
 **Mass spectra and m/z accuracy**
 
 - Average mass spectra: First plot shows the average spectrum over the full m/z range, the other three plots zoom into the m/z axis.
+- (annot) Average mass spectrum per annotation group.
+- Random mass spectra: The mass spectra for four random pixel are plotted.
 - (cal) For each calibrant four zoomed average mass spectrum are drawn with different zooming level. The theoretical calibrant m/z (taken from the input file) is represented by the dashed blue line. The dotted blue lines show the given ppm range. The green line is the m/z value that is closest to the theoretical calibrant and the red line is the m/z with the highest average intensity in the m/z window. In the second spectrum each blue dot indicates one data point.
 - (annot) Average spectrum per annotation group: For each calibrant a zoomed in mass spectrum is plotted this time with the average intensities for each annotation group separately. 
 - (cal) Difference m/z with max. average intensity vs. theor. calibrant m/z: The difference in ppm between the m/z with the highest average intensity and the theoretical m/z are plotted for each calibrant. This corresponds to the difference between the dashed blue line and the red line in the zoomed in mass spectra.
author	galaxyp
date	Fri, 22 Mar 2019 08:11:43 -0400
parents	16556ca0196b
children	f0d1f3e97303