comparison msi_preprocessing.xml @ 9:4d5578b57a77 draft

planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/msi_preprocessing commit 5feaf3d0e0da8cef1241fecc1f4d6f81324594e6
author galaxyp
date Wed, 22 Aug 2018 13:43:04 -0400
parents d77c5228fd1a
children df8d7f6f210b
comparison
equal deleted inserted replaced
8:d77c5228fd1a 9:4d5578b57a77
1 <tool id="mass_spectrometry_imaging_preprocessing" name="MSI preprocessing" version="1.10.0.5"> 1 <tool id="mass_spectrometry_imaging_preprocessing" name="MSI preprocessing" version="1.10.0.6">
2 <description> 2 <description>
3 mass spectrometry imaging preprocessing 3 mass spectrometry imaging preprocessing
4 </description> 4 </description>
5 <requirements> 5 <requirements>
6 <requirement type="package" version="1.10.0">bioconductor-cardinal</requirement> 6 <requirement type="package" version="1.10.0">bioconductor-cardinal</requirement>
43 msidata <- readImzML('infile', attach.only=TRUE) 43 msidata <- readImzML('infile', attach.only=TRUE)
44 #end if 44 #end if
45 #elif $infile.ext == 'analyze75' 45 #elif $infile.ext == 'analyze75'
46 msidata = readAnalyze('infile', attach.only=TRUE) 46 msidata = readAnalyze('infile', attach.only=TRUE)
47 #else 47 #else
48 load('infile.RData') 48 loadRData <- function(fileName){
49 load(fileName)
50 get(ls()[ls() != "fileName"])
51 }
52 msidata = loadRData('infile.RData')
49 #end if 53 #end if
50 54
51 print(paste0("Number of NA in input file: ",sum(is.na(spectra(msidata)[])))) 55 print(paste0("Number of NA in input file: ",sum(is.na(spectra(msidata)[]))))
52 56
53 ## function to later read RData reference files in 57 ## function to later read RData reference files in
62 ######################### preparations for QC report ################# 66 ######################### preparations for QC report #################
63 67
64 maxfeatures = length(features(msidata)) 68 maxfeatures = length(features(msidata))
65 medianpeaks = median(colSums(spectra(msidata)[]>0, na.rm=TRUE)) 69 medianpeaks = median(colSums(spectra(msidata)[]>0, na.rm=TRUE))
66 medint = round(median(spectra(msidata)[],na.rm=TRUE), digits=2) 70 medint = round(median(spectra(msidata)[],na.rm=TRUE), digits=2)
67 TICs = round(mean(colSums(spectra(msidata)[], na.rm=TRUE)), digits=1) 71 minmz = round(min(mz(msidata)), digits=2)
68 QC_numbers= data.frame(inputdata = c(maxfeatures, medianpeaks, medint, TICs)) 72 maxmz = round(max(mz(msidata)), digits=2)
73 QC_numbers= data.frame(inputdata = c(minmz, maxmz,maxfeatures, medianpeaks, medint))
69 vectorofactions = "inputdata" 74 vectorofactions = "inputdata"
70 75
71 ############################### Preprocessing steps ########################### 76 ############################### Preprocessing steps ###########################
72 ############################################################################### 77 ###############################################################################
73 78
84 ############################### QC ########################### 89 ############################### QC ###########################
85 90
86 maxfeatures = length(features(msidata)) 91 maxfeatures = length(features(msidata))
87 medianpeaks = median(colSums(spectra(msidata)[]>0, na.rm=TRUE),) 92 medianpeaks = median(colSums(spectra(msidata)[]>0, na.rm=TRUE),)
88 medint = round(median(spectra(msidata)[], na.rm=TRUE), digits=2) 93 medint = round(median(spectra(msidata)[], na.rm=TRUE), digits=2)
89 TICs = round(mean(colSums(spectra(msidata)[], na.rm=TRUE)), digits=1) 94 minmz = round(min(mz(msidata)), digits=2)
90 normalized = c(maxfeatures, medianpeaks, medint, TICs) 95 maxmz = round(max(mz(msidata)), digits=2)
96 normalized = c(minmz, maxmz,maxfeatures, medianpeaks, medint)
91 QC_numbers= cbind(QC_numbers, normalized) 97 QC_numbers= cbind(QC_numbers, normalized)
92 vectorofactions = append(vectorofactions, "normalized") 98 vectorofactions = append(vectorofactions, "normalized")
93 99
94 ############################### Baseline reduction ########################### 100 ############################### Baseline reduction ###########################
95 101
102 ############################### QC ########################### 108 ############################### QC ###########################
103 109
104 maxfeatures = length(features(msidata)) 110 maxfeatures = length(features(msidata))
105 medianpeaks = median(colSums(spectra(msidata)[]>0, na.rm=TRUE)) 111 medianpeaks = median(colSums(spectra(msidata)[]>0, na.rm=TRUE))
106 medint = round(median(spectra(msidata)[], na.rm=TRUE), digits=2) 112 medint = round(median(spectra(msidata)[], na.rm=TRUE), digits=2)
107 TICs = round(mean(colSums(spectra(msidata)[], na.rm=TRUE)), digits=1) 113 minmz = round(min(mz(msidata)), digits=2)
108 baseline= c(maxfeatures, medianpeaks, medint, TICs) 114 maxmz = round(max(mz(msidata)), digits=2)
115 baseline = c(minmz, maxmz,maxfeatures, medianpeaks, medint)
109 QC_numbers= cbind(QC_numbers, baseline) 116 QC_numbers= cbind(QC_numbers, baseline)
110 vectorofactions = append(vectorofactions, "baseline red.") 117 vectorofactions = append(vectorofactions, "baseline red.")
111 118
112 ############################### Smoothing ########################### 119 ############################### Smoothing ###########################
113 120
134 ############################### QC ########################### 141 ############################### QC ###########################
135 142
136 maxfeatures = length(features(msidata)) 143 maxfeatures = length(features(msidata))
137 medianpeaks = median(colSums(spectra(msidata)[]>0, na.rm=TRUE)) 144 medianpeaks = median(colSums(spectra(msidata)[]>0, na.rm=TRUE))
138 medint = round(median(spectra(msidata)[], na.rm=TRUE), digits=2) 145 medint = round(median(spectra(msidata)[], na.rm=TRUE), digits=2)
139 TICs = round(mean(colSums(spectra(msidata)[], na.rm=TRUE)), digits=1) 146 minmz = round(min(mz(msidata)), digits=2)
140 smoothed= c(maxfeatures, medianpeaks, medint, TICs) 147 maxmz = round(max(mz(msidata)), digits=2)
148 smoothed = c(minmz, maxmz,maxfeatures, medianpeaks, medint)
141 QC_numbers= cbind(QC_numbers, smoothed) 149 QC_numbers= cbind(QC_numbers, smoothed)
142 vectorofactions = append(vectorofactions, "smoothed") 150 vectorofactions = append(vectorofactions, "smoothed")
143 151
144 ############################### Peak picking ########################### 152 ############################### Peak picking ###########################
145 153
146 #elif str( $method.methods_conditional.preprocessing_method) == 'Peak_picking': 154 #elif str( $method.methods_conditional.preprocessing_method) == 'Peak_picking':
147 print('Peak_picking') 155 print('Peak_picking')
148 ## Peakpicking 156 ## Peakpicking
149 157
150
151 ## remove duplicated coordinates, otherwise peak picking will fail 158 ## remove duplicated coordinates, otherwise peak picking will fail
152 print(paste0(sum(duplicated(coord(msidata))), " coordinates were removed")) 159 print(paste0(sum(duplicated(coord(msidata))), " coordinates were removed"))
153 msidata <- msidata[,!duplicated(coord(msidata))] 160 msidata <- msidata[,!duplicated(coord(msidata))]
154 161
155 #if str( $method.methods_conditional.methods_for_picking.picking_method) == 'adaptive': 162 #if str( $method.methods_conditional.methods_for_picking.picking_method) == 'adaptive':
172 ############################### QC ########################### 179 ############################### QC ###########################
173 180
174 maxfeatures = length(features(msidata)) 181 maxfeatures = length(features(msidata))
175 medianpeaks = median(colSums(spectra(msidata)[]>0, na.rm=TRUE)) 182 medianpeaks = median(colSums(spectra(msidata)[]>0, na.rm=TRUE))
176 medint = round(median(spectra(msidata)[], na.rm=TRUE), digits=2) 183 medint = round(median(spectra(msidata)[], na.rm=TRUE), digits=2)
177 TICs = round(mean(colSums(spectra(msidata)[], na.rm=TRUE)), digits=1) 184 minmz = round(min(mz(msidata)), digits=2)
178 picked= c(maxfeatures, medianpeaks, medint, TICs) 185 maxmz = round(max(mz(msidata)), digits=2)
186 picked = c(minmz, maxmz,maxfeatures, medianpeaks, medint)
179 QC_numbers= cbind(QC_numbers, picked) 187 QC_numbers= cbind(QC_numbers, picked)
180 vectorofactions = append(vectorofactions, "picked") 188 vectorofactions = append(vectorofactions, "picked")
181 189
182 ############################### Peak alignment ########################### 190 ############################### Peak alignment ###########################
183 191
218 ############################### QC ########################### 226 ############################### QC ###########################
219 227
220 maxfeatures = length(features(msidata)) 228 maxfeatures = length(features(msidata))
221 medianpeaks = median(colSums(spectra(msidata)[]>0, na.rm=TRUE)) 229 medianpeaks = median(colSums(spectra(msidata)[]>0, na.rm=TRUE))
222 medint = round(median(spectra(msidata)[], na.rm=TRUE), digits=2) 230 medint = round(median(spectra(msidata)[], na.rm=TRUE), digits=2)
223 TICs = round(mean(colSums(spectra(msidata)[], na.rm=TRUE)), digits=1) 231 minmz = round(min(mz(msidata)), digits=2)
224 aligned= c(maxfeatures, medianpeaks, medint, TICs) 232 maxmz = round(max(mz(msidata)), digits=2)
233 aligned = c(minmz, maxmz,maxfeatures, medianpeaks, medint)
225 QC_numbers= cbind(QC_numbers, aligned) 234 QC_numbers= cbind(QC_numbers, aligned)
226 vectorofactions = append(vectorofactions, "aligned") 235 vectorofactions = append(vectorofactions, "aligned")
227 236
228 ############################### Peak filtering ########################### 237 ############################### Peak filtering ###########################
229 238
235 ############################### QC ########################### 244 ############################### QC ###########################
236 245
237 maxfeatures = length(features(msidata)) 246 maxfeatures = length(features(msidata))
238 medianpeaks = median(colSums(spectra(msidata)[]>0, na.rm=TRUE)) 247 medianpeaks = median(colSums(spectra(msidata)[]>0, na.rm=TRUE))
239 medint = round(median(spectra(msidata)[], na.rm=TRUE), digits=2) 248 medint = round(median(spectra(msidata)[], na.rm=TRUE), digits=2)
240 TICs = round(mean(colSums(spectra(msidata)[], na.rm=TRUE)), digits=1) 249 minmz = round(min(mz(msidata)), digits=2)
241 filtered= c(maxfeatures, medianpeaks, medint, TICs) 250 maxmz = round(max(mz(msidata)), digits=2)
251 filtered = c(minmz, maxmz,maxfeatures, medianpeaks, medint)
242 QC_numbers= cbind(QC_numbers, filtered) 252 QC_numbers= cbind(QC_numbers, filtered)
243 vectorofactions = append(vectorofactions, "filtered") 253 vectorofactions = append(vectorofactions, "filtered")
244 254
245 ############################### Data reduction ########################### 255 ############################### Data reduction ###########################
246 256
277 ############################### QC ########################### 287 ############################### QC ###########################
278 288
279 maxfeatures = length(features(msidata)) 289 maxfeatures = length(features(msidata))
280 medianpeaks = median(colSums(spectra(msidata)[]>0, na.rm=TRUE)) 290 medianpeaks = median(colSums(spectra(msidata)[]>0, na.rm=TRUE))
281 medint = round(median(spectra(msidata)[], na.rm=TRUE), digits=2) 291 medint = round(median(spectra(msidata)[], na.rm=TRUE), digits=2)
282 TICs = round(mean(colSums(spectra(msidata)[], na.rm=TRUE)), digits=1) 292 minmz = round(min(mz(msidata)), digits=2)
283 reduced= c(maxfeatures, medianpeaks, medint, TICs) 293 maxmz = round(max(mz(msidata)), digits=2)
294 reduced = c(minmz, maxmz,maxfeatures, medianpeaks, medint)
284 QC_numbers= cbind(QC_numbers, reduced) 295 QC_numbers= cbind(QC_numbers, reduced)
285 vectorofactions = append(vectorofactions, "reduced") 296 vectorofactions = append(vectorofactions, "reduced")
286 297
287 ############################### Transformation ########################### 298 ############################### Transformation ###########################
288 299
306 ############################### QC ########################### 317 ############################### QC ###########################
307 318
308 maxfeatures = length(features(msidata)) 319 maxfeatures = length(features(msidata))
309 medianpeaks = median(colSums(spectra(msidata)[]>0, na.rm=TRUE)) 320 medianpeaks = median(colSums(spectra(msidata)[]>0, na.rm=TRUE))
310 medint = round(median(spectra(msidata)[], na.rm=TRUE), digits=2) 321 medint = round(median(spectra(msidata)[], na.rm=TRUE), digits=2)
311 TICs = round(mean(colSums(spectra(msidata)[], na.rm=TRUE)), digits=1) 322 minmz = round(min(mz(msidata)), digits=2)
312 transformed= c(maxfeatures, medianpeaks, medint, TICs) 323 maxmz = round(max(mz(msidata)), digits=2)
324 transformed = c(minmz, maxmz,maxfeatures, medianpeaks, medint)
313 QC_numbers= cbind(QC_numbers, transformed) 325 QC_numbers= cbind(QC_numbers, transformed)
314 vectorofactions = append(vectorofactions, "transformed") 326 vectorofactions = append(vectorofactions, "transformed")
315 327
316 #end if 328 #end if
317 #end for 329 #end for
434 ## save QC report 446 ## save QC report
435 447
436 pdf("Preprocessing.pdf", fonts = "Times", pointsize = 12) 448 pdf("Preprocessing.pdf", fonts = "Times", pointsize = 12)
437 plot(0,type='n',axes=FALSE,ann=FALSE) 449 plot(0,type='n',axes=FALSE,ann=FALSE)
438 title(main=paste("Quality control during preprocessing \n", "Filename:", "$infile.display_name")) 450 title(main=paste("Quality control during preprocessing \n", "Filename:", "$infile.display_name"))
439 rownames(QC_numbers) = c("# features", "median # peaks", "median intensity", "median TIC") 451 rownames(QC_numbers) = c("min m/z", "max mz", "# features", "median\n# peaks", "median\nintensity")
440 grid.table(t(QC_numbers)) 452 grid.table(t(QC_numbers))
441 453
442 #if str($tabular_annotation.load_annotation) == 'yes_annotation': 454 #if str($tabular_annotation.load_annotation) == 'yes_annotation':
443 455
444 ## the more annotation groups a file has the smaller will be the legend 456 ## the more annotation groups a file has the smaller will be the legend
889 901
890 Cardinal is an R package that implements statistical & computational tools for analyzing mass spectrometry imaging datasets. `More information on Cardinal <http://cardinalmsi.org//>`_ 902 Cardinal is an R package that implements statistical & computational tools for analyzing mass spectrometry imaging datasets. `More information on Cardinal <http://cardinalmsi.org//>`_
891 903
892 This tool provides provides multiple Cardinal functions to preprocess mass spectrometry imaging data. 904 This tool provides provides multiple Cardinal functions to preprocess mass spectrometry imaging data.
893 905
894 Input data: 3 types of input data can be used: 906 Input data: 3 types of MSI data can be used:
895 907
896 - imzml file (upload imzml and ibd file via the "composite" function) `Introduction to the imzml format <https://ms-imaging.org/wp/imzml/>`_ 908 - imzml file (upload imzml and ibd file via the "composite" function) `Introduction to the imzml format <https://ms-imaging.org/wp/imzml/>`_
897 - Analyze7.5 (upload hdr, img and t2m file via the "composite" function) 909 - Analyze7.5 (upload hdr, img and t2m file via the "composite" function)
898 - Cardinal "MSImageSet" data (with variable name "msidata", saved as .RData) 910 - Cardinal "MSImageSet" data (with variable name "msidata", saved as .RData)
911 - Coordinates stored as decimals rather than integers will be rounded to obtain a regular pixel grid. This might lead to duplicated coordinates which will be automatically removed before peak picking.
899 - optional: tabular file with pixel annotations: x and y values in separate columns and the corresponding annotation in a third column 912 - optional: tabular file with pixel annotations: x and y values in separate columns and the corresponding annotation in a third column
900 913
901 Options: 914 Options:
902 915
903 - Normalization: Normalization of intensities to total ion current (TIC) 916 - Normalization: Normalization of intensities to total ion current (TIC)
904 - Baseline reduction: Baseline reduction removes backgroundintensity generated by chemical noise (common in MALDI datasets) 917 - Baseline reduction: Baseline reduction removes background intensity generated by chemical noise (common in MALDI datasets)
905 - Smoothening: Smoothing of the peaks reduces noise and improves peak detection 918 - Smoothing: Smoothing of the peaks reduces noise and improves peak detection
906 - Peak picking: relevant peaks are picked while noise-peaks are removed (needs peak alignment afterwards) 919 - Peak picking: relevant peaks are picked while noise-peaks are removed (needs peak alignment afterwards)
907 - Peak alignment: only possible after peak picking, m/z inaccuracies are removed by alignment of same peaks to a common m/z value 920 - Peak alignment: only possible after peak picking, m/z inaccuracies are removed by alignment of same peaks to a common m/z value
908 - Peak filtering: works only on centroided data (after peak picking and alignment or data reduction with peak filtering), removes peaks that occur only in a small proportion of pixels. If not sure which cutoff to chose run qualitycontrol first and decide according to the zero value plot. 921 - Peak filtering: works only on centroided data (after peak picking and alignment or data reduction with peak filtering), removes peaks that occur only in a small proportion of pixels. If not sure which cutoff to chose run qualitycontrol first and decide according to the zero value plot.
909 - Data reduction: binning, resampling or peak filtering to reduce data 922 - Data reduction: binning, resampling or peak filtering to reduce data
910 - Transformation: log2 or squareroot transformation of all intensities; when using log2 transformation zero intensities will become NA, this can lead to compatibility problems. 923 - Transformation: log2 or squareroot transformation of all intensities; when using log2 transformation zero intensities will become NA, this can lead to compatibility problems.
912 925
913 Output: 926 Output:
914 927
915 - imzML file, preprocessed 928 - imzML file, preprocessed
916 - pdf with key values after each processing step, in case of loaded annotations file overview plot of pixel annotations 929 - pdf with key values after each processing step, in case of loaded annotations file overview plot of pixel annotations
917 - optional: intensity matrix as tabular file (intensities for m/z in rows and pixel in columns) 930 - optional: intensity matrix as tabular file (m/z in rows and pixel in columns, filled with intensity values)
918 - optional: summarized intensity matrix: mean, median or standard deviation for each m/z feature; in case pixel annotations are provided the intensity values are summarized for each pixel group 931 - optional: summarized intensity matrix: mean, median or standard deviation for each m/z feature; in case pixel annotations are provided the intensity values are summarized for each pixel group
919 932
920 Tip: 933 Tip:
921 934
922 - Peak alignment works only after peak picking 935 - Peak alignment works only after peak picking