# HG changeset patch # User galaxyp # Date 1534959784 14400 # Node ID 4d5578b57a7716fe33c3fbcffa3bacc7100924a9 # Parent d77c5228fd1a1ad4de9258af7417f7cce32535ba planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/msi_preprocessing commit 5feaf3d0e0da8cef1241fecc1f4d6f81324594e6 diff -r d77c5228fd1a -r 4d5578b57a77 msi_preprocessing.xml --- a/msi_preprocessing.xml Tue Jul 24 04:53:10 2018 -0400 +++ b/msi_preprocessing.xml Wed Aug 22 13:43:04 2018 -0400 @@ -1,4 +1,4 @@ - + mass spectrometry imaging preprocessing @@ -45,7 +45,11 @@ #elif $infile.ext == 'analyze75' msidata = readAnalyze('infile', attach.only=TRUE) #else - load('infile.RData') + loadRData <- function(fileName){ + load(fileName) + get(ls()[ls() != "fileName"]) + } + msidata = loadRData('infile.RData') #end if print(paste0("Number of NA in input file: ",sum(is.na(spectra(msidata)[])))) @@ -64,8 +68,9 @@ maxfeatures = length(features(msidata)) medianpeaks = median(colSums(spectra(msidata)[]>0, na.rm=TRUE)) medint = round(median(spectra(msidata)[],na.rm=TRUE), digits=2) - TICs = round(mean(colSums(spectra(msidata)[], na.rm=TRUE)), digits=1) - QC_numbers= data.frame(inputdata = c(maxfeatures, medianpeaks, medint, TICs)) + minmz = round(min(mz(msidata)), digits=2) + maxmz = round(max(mz(msidata)), digits=2) + QC_numbers= data.frame(inputdata = c(minmz, maxmz,maxfeatures, medianpeaks, medint)) vectorofactions = "inputdata" ############################### Preprocessing steps ########################### @@ -86,8 +91,9 @@ maxfeatures = length(features(msidata)) medianpeaks = median(colSums(spectra(msidata)[]>0, na.rm=TRUE),) medint = round(median(spectra(msidata)[], na.rm=TRUE), digits=2) - TICs = round(mean(colSums(spectra(msidata)[], na.rm=TRUE)), digits=1) - normalized = c(maxfeatures, medianpeaks, medint, TICs) + minmz = round(min(mz(msidata)), digits=2) + maxmz = round(max(mz(msidata)), digits=2) + normalized = c(minmz, maxmz,maxfeatures, medianpeaks, medint) QC_numbers= cbind(QC_numbers, normalized) vectorofactions = append(vectorofactions, "normalized") @@ -104,8 +110,9 @@ maxfeatures = length(features(msidata)) medianpeaks = median(colSums(spectra(msidata)[]>0, na.rm=TRUE)) medint = round(median(spectra(msidata)[], na.rm=TRUE), digits=2) - TICs = round(mean(colSums(spectra(msidata)[], na.rm=TRUE)), digits=1) - baseline= c(maxfeatures, medianpeaks, medint, TICs) + minmz = round(min(mz(msidata)), digits=2) + maxmz = round(max(mz(msidata)), digits=2) + baseline = c(minmz, maxmz,maxfeatures, medianpeaks, medint) QC_numbers= cbind(QC_numbers, baseline) vectorofactions = append(vectorofactions, "baseline red.") @@ -136,8 +143,9 @@ maxfeatures = length(features(msidata)) medianpeaks = median(colSums(spectra(msidata)[]>0, na.rm=TRUE)) medint = round(median(spectra(msidata)[], na.rm=TRUE), digits=2) - TICs = round(mean(colSums(spectra(msidata)[], na.rm=TRUE)), digits=1) - smoothed= c(maxfeatures, medianpeaks, medint, TICs) + minmz = round(min(mz(msidata)), digits=2) + maxmz = round(max(mz(msidata)), digits=2) + smoothed = c(minmz, maxmz,maxfeatures, medianpeaks, medint) QC_numbers= cbind(QC_numbers, smoothed) vectorofactions = append(vectorofactions, "smoothed") @@ -147,7 +155,6 @@ print('Peak_picking') ## Peakpicking - ## remove duplicated coordinates, otherwise peak picking will fail print(paste0(sum(duplicated(coord(msidata))), " coordinates were removed")) msidata <- msidata[,!duplicated(coord(msidata))] @@ -174,8 +181,9 @@ maxfeatures = length(features(msidata)) medianpeaks = median(colSums(spectra(msidata)[]>0, na.rm=TRUE)) medint = round(median(spectra(msidata)[], na.rm=TRUE), digits=2) - TICs = round(mean(colSums(spectra(msidata)[], na.rm=TRUE)), digits=1) - picked= c(maxfeatures, medianpeaks, medint, TICs) + minmz = round(min(mz(msidata)), digits=2) + maxmz = round(max(mz(msidata)), digits=2) + picked = c(minmz, maxmz,maxfeatures, medianpeaks, medint) QC_numbers= cbind(QC_numbers, picked) vectorofactions = append(vectorofactions, "picked") @@ -220,8 +228,9 @@ maxfeatures = length(features(msidata)) medianpeaks = median(colSums(spectra(msidata)[]>0, na.rm=TRUE)) medint = round(median(spectra(msidata)[], na.rm=TRUE), digits=2) - TICs = round(mean(colSums(spectra(msidata)[], na.rm=TRUE)), digits=1) - aligned= c(maxfeatures, medianpeaks, medint, TICs) + minmz = round(min(mz(msidata)), digits=2) + maxmz = round(max(mz(msidata)), digits=2) + aligned = c(minmz, maxmz,maxfeatures, medianpeaks, medint) QC_numbers= cbind(QC_numbers, aligned) vectorofactions = append(vectorofactions, "aligned") @@ -237,8 +246,9 @@ maxfeatures = length(features(msidata)) medianpeaks = median(colSums(spectra(msidata)[]>0, na.rm=TRUE)) medint = round(median(spectra(msidata)[], na.rm=TRUE), digits=2) - TICs = round(mean(colSums(spectra(msidata)[], na.rm=TRUE)), digits=1) - filtered= c(maxfeatures, medianpeaks, medint, TICs) + minmz = round(min(mz(msidata)), digits=2) + maxmz = round(max(mz(msidata)), digits=2) + filtered = c(minmz, maxmz,maxfeatures, medianpeaks, medint) QC_numbers= cbind(QC_numbers, filtered) vectorofactions = append(vectorofactions, "filtered") @@ -279,8 +289,9 @@ maxfeatures = length(features(msidata)) medianpeaks = median(colSums(spectra(msidata)[]>0, na.rm=TRUE)) medint = round(median(spectra(msidata)[], na.rm=TRUE), digits=2) - TICs = round(mean(colSums(spectra(msidata)[], na.rm=TRUE)), digits=1) - reduced= c(maxfeatures, medianpeaks, medint, TICs) + minmz = round(min(mz(msidata)), digits=2) + maxmz = round(max(mz(msidata)), digits=2) + reduced = c(minmz, maxmz,maxfeatures, medianpeaks, medint) QC_numbers= cbind(QC_numbers, reduced) vectorofactions = append(vectorofactions, "reduced") @@ -308,8 +319,9 @@ maxfeatures = length(features(msidata)) medianpeaks = median(colSums(spectra(msidata)[]>0, na.rm=TRUE)) medint = round(median(spectra(msidata)[], na.rm=TRUE), digits=2) - TICs = round(mean(colSums(spectra(msidata)[], na.rm=TRUE)), digits=1) - transformed= c(maxfeatures, medianpeaks, medint, TICs) + minmz = round(min(mz(msidata)), digits=2) + maxmz = round(max(mz(msidata)), digits=2) + transformed = c(minmz, maxmz,maxfeatures, medianpeaks, medint) QC_numbers= cbind(QC_numbers, transformed) vectorofactions = append(vectorofactions, "transformed") @@ -436,7 +448,7 @@ pdf("Preprocessing.pdf", fonts = "Times", pointsize = 12) plot(0,type='n',axes=FALSE,ann=FALSE) title(main=paste("Quality control during preprocessing \n", "Filename:", "$infile.display_name")) - rownames(QC_numbers) = c("# features", "median # peaks", "median intensity", "median TIC") + rownames(QC_numbers) = c("min m/z", "max mz", "# features", "median\n# peaks", "median\nintensity") grid.table(t(QC_numbers)) #if str($tabular_annotation.load_annotation) == 'yes_annotation': @@ -891,18 +903,19 @@ This tool provides provides multiple Cardinal functions to preprocess mass spectrometry imaging data. -Input data: 3 types of input data can be used: +Input data: 3 types of MSI data can be used: - imzml file (upload imzml and ibd file via the "composite" function) `Introduction to the imzml format `_ - Analyze7.5 (upload hdr, img and t2m file via the "composite" function) - Cardinal "MSImageSet" data (with variable name "msidata", saved as .RData) +- Coordinates stored as decimals rather than integers will be rounded to obtain a regular pixel grid. This might lead to duplicated coordinates which will be automatically removed before peak picking. - optional: tabular file with pixel annotations: x and y values in separate columns and the corresponding annotation in a third column Options: - Normalization: Normalization of intensities to total ion current (TIC) -- Baseline reduction: Baseline reduction removes backgroundintensity generated by chemical noise (common in MALDI datasets) -- Smoothening: Smoothing of the peaks reduces noise and improves peak detection +- Baseline reduction: Baseline reduction removes background intensity generated by chemical noise (common in MALDI datasets) +- Smoothing: Smoothing of the peaks reduces noise and improves peak detection - Peak picking: relevant peaks are picked while noise-peaks are removed (needs peak alignment afterwards) - Peak alignment: only possible after peak picking, m/z inaccuracies are removed by alignment of same peaks to a common m/z value - Peak filtering: works only on centroided data (after peak picking and alignment or data reduction with peak filtering), removes peaks that occur only in a small proportion of pixels. If not sure which cutoff to chose run qualitycontrol first and decide according to the zero value plot. @@ -914,7 +927,7 @@ - imzML file, preprocessed - pdf with key values after each processing step, in case of loaded annotations file overview plot of pixel annotations -- optional: intensity matrix as tabular file (intensities for m/z in rows and pixel in columns) +- optional: intensity matrix as tabular file (m/z in rows and pixel in columns, filled with intensity values) - optional: summarized intensity matrix: mean, median or standard deviation for each m/z feature; in case pixel annotations are provided the intensity values are summarized for each pixel group Tip: diff -r d77c5228fd1a -r 4d5578b57a77 test-data/preprocessing_results1.RData Binary file test-data/preprocessing_results1.RData has changed diff -r d77c5228fd1a -r 4d5578b57a77 test-data/preprocessing_results1.pdf Binary file test-data/preprocessing_results1.pdf has changed diff -r d77c5228fd1a -r 4d5578b57a77 test-data/preprocessing_results2.pdf Binary file test-data/preprocessing_results2.pdf has changed diff -r d77c5228fd1a -r 4d5578b57a77 test-data/preprocessing_results3.RData Binary file test-data/preprocessing_results3.RData has changed diff -r d77c5228fd1a -r 4d5578b57a77 test-data/preprocessing_results3.pdf Binary file test-data/preprocessing_results3.pdf has changed diff -r d77c5228fd1a -r 4d5578b57a77 test-data/preprocessing_results4.RData Binary file test-data/preprocessing_results4.RData has changed diff -r d77c5228fd1a -r 4d5578b57a77 test-data/preprocessing_results4.pdf Binary file test-data/preprocessing_results4.pdf has changed diff -r d77c5228fd1a -r 4d5578b57a77 test-data/preprocessing_results5.RData Binary file test-data/preprocessing_results5.RData has changed diff -r d77c5228fd1a -r 4d5578b57a77 test-data/preprocessing_results5.pdf Binary file test-data/preprocessing_results5.pdf has changed