Mercurial > repos > galaxyp > cardinal_quality_report
diff quality_report.xml @ 4:3b7a35d50ebf draft
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cardinal commit ecdc3a64aa245d80dbc5487b2bf10a85a43adc6d
author | galaxyp |
---|---|
date | Fri, 22 Mar 2019 08:11:43 -0400 |
parents | 16556ca0196b |
children | f0d1f3e97303 |
line wrap: on
line diff
--- a/quality_report.xml Thu Feb 28 09:26:25 2019 -0500 +++ b/quality_report.xml Fri Mar 22 08:11:43 2019 -0400 @@ -1,4 +1,4 @@ -<tool id="cardinal_quality_report" name="MSI Qualitycontrol" version="@VERSION@.2"> +<tool id="cardinal_quality_report" name="MSI Qualitycontrol" version="@VERSION@.3"> <description> mass spectrometry imaging QC </description> @@ -36,11 +36,7 @@ @READING_MSIDATA_INRAM@ -## create full matrix to make processed imzML files compatible with segmentation and other steps -iData(msidata) <- iData(msidata)[] - ## remove duplicated coordinates -print(paste0(sum(duplicated(coord(msidata))), " duplicated coordinates were removed")) msidata <- msidata[,!duplicated(coord(msidata))] ## optional annotation from tabular file to obtain pixel groups (otherwise all pixels are considered to be one sample) @@ -67,20 +63,20 @@ @DATA_PROPERTIES_INRAM@ ## Median intensities -medint = round(median(spectra(msidata)[], na.rm=TRUE), digits=2) +medint = round(median(spectra(msidata), na.rm=TRUE), digits=2) ## Spectra multiplied with m/z (potential number of peaks) numpeaks = ncol(msidata)*nrow(msidata) ## Percentage of intensities > 0 percpeaks = round(npeaks/numpeaks*100, digits=2) ## Number of empty TICs -TICs = colSums(spectra(msidata)[], na.rm=TRUE) +TICs = colSums(spectra(msidata), na.rm=TRUE) NumemptyTIC = sum(TICs == 0) ## Median und sd TIC medTIC = round(median(TICs), digits=1) sdTIC = round(sd(TICs), digits=0) ## Median and sd # peaks per spectrum -medpeaks = round(median(colSums(spectra(msidata)[]>0, na.rm=TRUE), na.rm=TRUE), digits=0) -sdpeaks = round(sd(colSums(spectra(msidata)[]>0, na.rm=TRUE), na.rm=TRUE), digits=0) +medpeaks = round(median(colSums(spectra(msidata)>0, na.rm=TRUE), na.rm=TRUE), digits=0) +sdpeaks = round(sd(colSums(spectra(msidata)>0, na.rm=TRUE), na.rm=TRUE), digits=0) ## Processing informations centroidedinfo = centroided(msidata) @@ -247,15 +243,15 @@ filtered_data = msidata[mz(msidata) >= inputcalibrantmasses[mass]-plusminusvalues[mass] & mz(msidata) <= inputcalibrantmasses[mass]+plusminusvalues[mass],] - if (nrow(filtered_data) > 1 & sum(spectra(filtered_data)[],na.rm=TRUE) > 0){ + if (nrow(filtered_data) > 1 & sum(spectra(filtered_data),na.rm=TRUE) > 0){ ## intensity of all m/z > 0 - intensity_sum = colSums(spectra(filtered_data)[], na.rm=TRUE) > 0 + intensity_sum = colSums(spectra(filtered_data), na.rm=TRUE) > 0 - }else if(nrow(filtered_data) == 1 & sum(spectra(filtered_data)[], na.rm=TRUE) > 0){ + }else if(nrow(filtered_data) == 1 & sum(spectra(filtered_data), na.rm=TRUE) > 0){ ## intensity of only m/z > 0 - intensity_sum = spectra(filtered_data)[] > 0 + intensity_sum = spectra(filtered_data) > 0 }else{ @@ -364,7 +360,7 @@ #################### 5) Number of peaks per pixel - image ################## ## here every intensity value > 0 counts as peak - peaksperpixel = colSums(spectra(msidata)[]> 0, na.rm=TRUE) + peaksperpixel = colSums(spectra(msidata)> 0, na.rm=TRUE) peakscoordarray=cbind(coord(msidata)[,1:2], peaksperpixel) print(ggplot(peakscoordarray, aes(x=x, y=y, fill=peaksperpixel))+ @@ -400,7 +396,7 @@ ############################### 6b) median int image ############################### - median_int = apply(spectra(msidata)[],2,median) + median_int = apply(spectra(msidata),2,median) median_coordarray=cbind(coord(msidata)[,1:2], median_int) print(ggplot(median_coordarray, aes(x=x, y=y, fill=median_int))+ @@ -418,7 +414,7 @@ ############################### 6c) max int image ############################### - max_int = apply(spectra(msidata)[],2,max) + max_int = apply(spectra(msidata),2,max) max_coordarray=cbind(coord(msidata)[,1:2], max_int) print(ggplot(max_coordarray, aes(x=x, y=y, fill=max_int))+ @@ -437,7 +433,7 @@ ############################### 7) Most abundant m/z image ################# ## for each spectrum find the row (m/z) with the highest intensity - highestmz = apply(spectra(msidata)[],2,which.max) + highestmz = apply(spectra(msidata),2,which.max) ## in case for some spectra max returns integer(0), highestmz is a list and integer(0) have to be replaced with NA and unlisted if (class(highestmz) == "list"){ ##find zero-length values @@ -477,7 +473,7 @@ image(pca, col=c("black", "white"), strip=FALSE, ylim= c(maximumy+0.2*maximumy,minimumy-1)) for (PCs in 1:2){ - print(image(pca, column = c(paste0("PC",PCs)) , superpose = FALSE, col.regions = risk.colors(100), ylim=c(maximumy+2, minimumy-2)))} + print(image(pca, column = c(paste0("PC",PCs)) , strip=FALSE, superpose = FALSE, main=paste0("PC", PCs), col.regions = risk.colors(100), ylim=c(maximumy+2, minimumy-2)))} ## remove pca to clean up RAM space rm(pca) @@ -575,7 +571,7 @@ ########################## 12) Number of peaks per m/z ##################### - peakspermz = rowSums(spectra(msidata)[] > 0, na.rm=TRUE) + peakspermz = rowSums(spectra(msidata) > 0, na.rm=TRUE) par(mfrow = c(2,1), mar=c(5,6,4,4.5)) ## 12a) scatterplot @@ -594,7 +590,7 @@ ########################## 13) Sum of intensities per m/z ################## ## Sum of all intensities for each m/z (like TIC, but for m/z instead of pixel) - mzTIC = rowSums(spectra(msidata)[], na.rm=TRUE) ## calculate intensity sum for each m/z + mzTIC = rowSums(spectra(msidata), na.rm=TRUE) ## calculate intensity sum for each m/z par(mfrow = c(2,1), mar=c(5,6,4,2)) ## 13a) scatterplot @@ -625,7 +621,7 @@ abline(v=abline_vector, lty = 3)} ## 14b) histogram: - hist(spectra(msidata)[], main="", xlab = "", ylab="", las=1) + hist(spectra(msidata), main="", xlab = "", ylab="", las=1) title(main="Intensity histogram", line=2) title(xlab="intensities") title(ylab="Frequency", line=4) @@ -664,7 +660,7 @@ mean_mz_sample = rowMeans(spectra(msidata)[,msidata\$annotation==subsample],na.rm=TRUE) mean_matrix = cbind(mean_matrix, mean_mz_sample)} - boxplot(mean_matrix, ylab = "Mean intensity per m/z", main="Mean m/z intensities per annotation group", xaxt = "n") + boxplot(log10(mean_matrix), ylab = "Log10 mean intensity per m/z", main="Log10 mean m/z intensities per annotation group", xaxt = "n") (axis(1, at = c(1:number_combined), labels=levels(msidata\$annotation), las=2)) ## 14e) Heatmap of pearson correlation on mean intensities between annotation groups @@ -692,7 +688,7 @@ ## replace any NA with 0, otherwise plot function will not work at all msidata_no_NA = msidata - spectra(msidata_no_NA)[is.na(spectra(msidata_no_NA)[])] = 0 + spectra(msidata_no_NA)[is.na(spectra(msidata_no_NA))] = 0 ## find three equal m/z ranges for the average mass spectra plots: third_mz_range = nrow(msidata_no_NA)/3 @@ -703,6 +699,17 @@ plot(msidata_no_NA[third_mz_range:(2*third_mz_range),], pixel = 1:ncol(msidata_no_NA), main= "Zoomed average spectrum") plot(msidata_no_NA[(2*third_mz_range):nrow(msidata_no_NA),], pixel = 1:ncol(msidata_no_NA), main= "Zoomed average spectrum") + ## plot one average mass spectrum for each pixel annotation group + + if (!is.null(levels(msidata\$annotation))){ + ## print legend only for less than 10 samples + if (length(levels(msidata\$annotation)) < 10){ + key_legend = TRUE + }else{key_legend = FALSE} + par(mfrow = c(1,1), cex.axis=1, cex.lab=1, mar=c(5.1,4.1,4.1,2.1)) + plot(msidata, pixel=1:ncol(msidata), pixel.groups=msidata\$annotation, key=key_legend, col=hue_pal()(length(levels(msidata\$annotation))),superpose=TRUE, main="Average mass spectra for annotation groups") + } + ## plot 4 random mass spectra ## find four random pixel to plot their spectra in the following plots: pixel1 = sample(pixelnumber,1) @@ -1090,12 +1097,14 @@ - Median intensity per spectrum: Scatter plot in which each point represents the median intensity for one spectrum. Dotted lines in the scatter plot separate spectra of different annotation groups. - Histogram of intensities. - (annot) Intensities per annotation group: Same histogram as before but with colours to show the contribution of each pixel annotation group. -- (annot) Mean intensities per m/z and annotation group: For all pixels of an annotation group the mean intensity for each m/z is calculated and shown as boxplot. +- (annot) Log10 mean intensities per m/z and annotation group: For all pixels of an annotation group the log10 mean intensity for each m/z is calculated and shown as boxplot. - (annot) Pearson correlation between annotation groups (needs at least 2 groups) based on mean intensities and shown as heatmap. **Mass spectra and m/z accuracy** - Average mass spectra: First plot shows the average spectrum over the full m/z range, the other three plots zoom into the m/z axis. +- (annot) Average mass spectrum per annotation group. +- Random mass spectra: The mass spectra for four random pixel are plotted. - (cal) For each calibrant four zoomed average mass spectrum are drawn with different zooming level. The theoretical calibrant m/z (taken from the input file) is represented by the dashed blue line. The dotted blue lines show the given ppm range. The green line is the m/z value that is closest to the theoretical calibrant and the red line is the m/z with the highest average intensity in the m/z window. In the second spectrum each blue dot indicates one data point. - (annot) Average spectrum per annotation group: For each calibrant a zoomed in mass spectrum is plotted this time with the average intensities for each annotation group separately. - (cal) Difference m/z with max. average intensity vs. theor. calibrant m/z: The difference in ppm between the m/z with the highest average intensity and the theoretical m/z are plotted for each calibrant. This corresponds to the difference between the dashed blue line and the red line in the zoomed in mass spectra.