comparison msi_qualitycontrol.xml @ 2:1ccbda92b76b draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/msi_qualitycontrol commit a8eebad4ad469908f64c25e1e2c705eb637e3cae
author galaxyp
date Fri, 24 Nov 2017 18:08:38 -0500
parents c6bc77c4731d
children f6aa0cff777c
comparison
equal deleted inserted replaced
1:c6bc77c4731d 2:1ccbda92b76b
1 <tool id="mass_spectrometry_imaging_qc" name="MSI Qualitycontrol" version="1.7.0"> 1 <tool id="mass_spectrometry_imaging_qc" name="MSI Qualitycontrol" version="1.7.0.1">
2 <description> 2 <description>
3 mass spectrometry imaging QC 3 mass spectrometry imaging QC
4 </description> 4 </description>
5 <requirements> 5 <requirements>
6 <requirement type="package" version="1.7.0">bioconductor-cardinal</requirement> 6 <requirement type="package" version="1.7.0">bioconductor-cardinal</requirement>
7 <requirement type="package" version="2.2.1">r-ggplot2</requirement> 7 <requirement type="package" version="2.2.1">r-ggplot2</requirement>
8 <requirement type="package" version="1.1_2">r-rcolorbrewer</requirement> 8 <requirement type="package" version="1.1_2">r-rcolorbrewer</requirement>
9 <requirement type="package" version="2.2.1"> r-gridextra</requirement> 9 <requirement type="package" version="2.2.1">r-gridextra</requirement>
10 <requirement type="package" version="2.23_15">r-kernsmooth</requirement> 10 <requirement type="package" version="2.23_15">r-kernsmooth</requirement>
11 </requirements> 11 </requirements>
12 <command detect_errors="exit_code"> 12 <command detect_errors="exit_code">
13 <![CDATA[ 13 <![CDATA[
14
15 #if $infile.ext == 'imzml' 14 #if $infile.ext == 'imzml'
16 cp '${infile.extra_files_path}/imzml' infile.imzML && 15 cp '${infile.extra_files_path}/imzml' infile.imzML &&
17 cp '${infile.extra_files_path}/ibd' infile.ibd && 16 cp '${infile.extra_files_path}/ibd' infile.ibd &&
18 #elif $infile.ext == 'analyze75' 17 #elif $infile.ext == 'analyze75'
19 cp '${infile.extra_files_path}/hdr' infile.hdr && 18 cp '${infile.extra_files_path}/hdr' infile.hdr &&
27 ]]> 26 ]]>
28 </command> 27 </command>
29 <configfiles> 28 <configfiles>
30 <configfile name="cardinal_qualitycontrol_script"><![CDATA[ 29 <configfile name="cardinal_qualitycontrol_script"><![CDATA[
31 30
32 ################################# load libraries and read file #########################
33 library(Cardinal) 31 library(Cardinal)
34 library(ggplot2) 32 library(ggplot2)
35 library(RColorBrewer) 33 library(RColorBrewer)
36 library(gridExtra) 34 library(gridExtra)
37 library(KernSmooth) 35 library(KernSmooth)
46 #else 44 #else
47 load('infile.RData') 45 load('infile.RData')
48 #end if 46 #end if
49 47
50 #if $inputpeptidefile: 48 #if $inputpeptidefile:
51 ## Read tabular file with peptide masses for plots and heatmap images: 49 ### Read tabular file with peptide masses for plots and heatmap images:
52 input_list = read.delim("$inputpeptidefile", header = FALSE, na.strings=c("","NA", "#NUM!", "#ZAHL!"), stringsAsFactors = FALSE) 50 input_list = read.delim("$inputpeptidefile", header = FALSE, na.strings=c("","NA", "#NUM!", "#ZAHL!"), stringsAsFactors = FALSE)
51 if (ncol(input_list) == 1)
52 {
53 input_list = cbind(input_list, input_list)
54 }
53 #else 55 #else
54 input_list = data.frame(0, 0) 56 input_list = data.frame(0, 0)
55 #end if 57 #end if
58 colnames(input_list)[1:2] = c("mz", "name")
59
60 #if $inputcalibrants:
61 ### Read tabular file with calibrant masses:
62 calibrant_list = read.delim("$inputcalibrants", header = FALSE, na.strings=c("","NA", "#NUM!", "#ZAHL!"), stringsAsFactors = FALSE)
63 if (ncol(calibrant_list) == 1)
64 {
65 calibrant_list = cbind(calibrant_list, calibrant_list)
66 }
67 #else
68 calibrant_list = data.frame(0,0)
69 #end if
70
71 colnames(calibrant_list)[1:2] = c("mz", "name")
72
56 73
57 ###################################### file properties in numbers ###################### 74 ###################################### file properties in numbers ######################
58 75
59 ## Number of features (mz) 76 ## Number of features (mz)
60 maxfeatures = length(features(msidata)) 77 maxfeatures = length(features(msidata))
71 maximumy = max(coord(msidata)[,2]) 88 maximumy = max(coord(msidata)[,2])
72 ## Range of intensities 89 ## Range of intensities
73 minint = round(min(spectra(msidata)[]), digits=2) 90 minint = round(min(spectra(msidata)[]), digits=2)
74 maxint = round(max(spectra(msidata)[]), digits=2) 91 maxint = round(max(spectra(msidata)[]), digits=2)
75 medint = round(median(spectra(msidata)[]), digits=2) 92 medint = round(median(spectra(msidata)[]), digits=2)
76 ## Number of intensities > 0 93 ## Number of intensities > 0
77 npeaks= sum(spectra(msidata)[]>0) 94 npeaks= sum(spectra(msidata)[]>0)
78 ## Spectra multiplied with mz (potential number of peaks) 95 ## Spectra multiplied with mz (potential number of peaks)
79 numpeaks = ncol(spectra(msidata)[])*nrow(spectra(msidata)[]) 96 numpeaks = ncol(spectra(msidata)[])*nrow(spectra(msidata)[])
80 ## Percentage of intensities > 0 97 ## Percentage of intensities > 0
81 percpeaks = round(npeaks/numpeaks*100, digits=2) 98 percpeaks = round(npeaks/numpeaks*100, digits=2)
112 peakpickinginfo='FALSE' 129 peakpickinginfo='FALSE'
113 } else { 130 } else {
114 peakpickinginfo=processinginfo@peakPicking 131 peakpickinginfo=processinginfo@peakPicking
115 } 132 }
116 133
117 134 ### calculate how many input peptide masses are valid:
118 ## calculate how many input peptide masses are valid:
119 inputpeptides = input_list[input_list[,1]>minmz & input_list[,1]<maxmz,] 135 inputpeptides = input_list[input_list[,1]>minmz & input_list[,1]<maxmz,]
120 inputmasses = inputpeptides[,1] 136
121 inputnames = inputpeptides[,2] 137 ### calculate how many input calibrant masses are valid:
122 138 inputcalibrants = calibrant_list[calibrant_list[,1]>minmz & calibrant_list[,1]<maxmz,]
123 ############################################################################# 139
140 ### bind inputcalibrants and inputpeptides together, to make heatmap on both lists
141
142 inputs_all = rbind(inputcalibrants[,1:2], inputpeptides[,1:2])
143 inputmasses = inputs_all[,1]
144 inputnames = inputs_all[,2]
145
124 146
125 properties = c("Number of mz features", 147 properties = c("Number of mz features",
126 "Range of mz values [Da]", 148 "Range of mz values [Da]",
127 "Number of pixels", 149 "Number of pixels",
128 "Range of x coordinates", 150 "Range of x coordinates",
135 "Normalization", 157 "Normalization",
136 "Smoothing", 158 "Smoothing",
137 "Baseline reduction", 159 "Baseline reduction",
138 "Peak picking", 160 "Peak picking",
139 "Centroided", 161 "Centroided",
140 "# valid peptidemasses") 162 "# valid input masses")
141 163
142 values = c(paste0(maxfeatures), 164 values = c(paste0(maxfeatures),
143 paste0(minmz, " - ", maxmz), 165 paste0(minmz, " - ", maxmz),
144 paste0(pixelcount), 166 paste0(pixelcount),
145 paste0(minimumx, " - ", maximumx), 167 paste0(minimumx, " - ", maximumx),
157 paste0(length(inputmasses))) 179 paste0(length(inputmasses)))
158 180
159 181
160 property_df = data.frame(properties, values) 182 property_df = data.frame(properties, values)
161 183
162
163 ## Variables for plots
164 xrange = 1
165 yrange = 1
166 maxx = max(coord(msidata)[,1])+xrange
167 minx = min(coord(msidata)[,1])-xrange
168 maxy = max(coord(msidata)[,2])+yrange
169 miny = min(coord(msidata)[,2])-yrange
170
171
172 ####################################### Preparation of images #########################
173
174 ## Acquisitionorder
175
176 pixelnumber = 1:pixelcount
177 pixelxyarray=cbind(coord(msidata),pixelnumber)
178
179
180 ## Number of peaks per pixel
181 peaksperpixel = colSums(spectra(msidata)[]> 0)
182 peakscoordarray=cbind(coord(msidata), peaksperpixel)
183
184 ## Most abundant mz
185
186 highestmz = apply(spectra(msidata)[],2,which.max)
187 highestmz_matrix = cbind(coord(msidata),mz(msidata)[highestmz])
188 colnames(highestmz_matrix)[3] = "highestmzinDa"
189
190 ###################################### Preparation of plots ############################
191
192 ## function without xaxt for plots with automatic x axis
193 plot_colorByDensity = function(x1,x2,
194 ylim=c(min(x2),max(x2)),
195 xlim=c(min(x1),max(x1)),
196 xlab="",ylab="",main="") {
197
198 df <- data.frame(x1,x2)
199 x <- densCols(x1,x2, colramp=colorRampPalette(c("black", "white")))
200 df\$dens <- col2rgb(x)[1,] + 1L
201 cols <- colorRampPalette(c("#000099", "#00FEFF", "#45FE4F","#FCFF00", "#FF9400", "#FF3100"))(256)
202 df\$col <- cols[df\$dens]
203 plot(x2~x1, data=df[order(df\$dens),],
204 ylim=ylim,xlim=xlim,pch=20,col=col,
205 cex=1,xlab=xlab,ylab=ylab,las=1,
206 main=main)
207 }
208
209 ## Number of peaks per mz - number across all pixel
210 peakspermz = rowSums(spectra(msidata)[] > 0 )
211
212 ## Sum of all intensities for each mz (like TIC, but for mz instead of pixel)
213 mzTIC = rowSums(spectra(msidata)[]) # calculate intensity sum for each mz
214
215
216
217 ######################################## PDF ############################################# 184 ######################################## PDF #############################################
218 ########################################################################################## 185 ##########################################################################################
219 ########################################################################################## 186 ##########################################################################################
220
221 187
222 pdf("qualitycontrol.pdf", fonts = "Times", pointsize = 12) 188 pdf("qualitycontrol.pdf", fonts = "Times", pointsize = 12)
223 plot(0,type='n',axes=FALSE,ann=FALSE) 189 plot(0,type='n',axes=FALSE,ann=FALSE)
224 #if not $filename: 190 #if not $filename:
225 #set $filename = $infile.display_name 191 #set $filename = $infile.display_name
228 194
229 ############################# I) numbers #################################### 195 ############################# I) numbers ####################################
230 ############################################################################# 196 #############################################################################
231 grid.table(property_df, rows= NULL) 197 grid.table(property_df, rows= NULL)
232 198
233 ############################# II) ion images ################################# 199 if (npeaks > 0)
234 ############################################################################## 200 {
235 201 ############################# II) ion images #################################
236 ## 1) Acquisition image 202 ##############################################################################
237 (ggplot(pixelxyarray, aes(x=x, y=y, fill=pixelnumber)) 203
238 +scale_y_reverse() + geom_tile() + coord_fixed() 204 ## function without xaxt for plots with automatic x axis
239 + ggtitle("1) Order of Acquisition") 205 plot_colorByDensity = function(x1,x2,
240 +theme_bw() 206 ylim=c(min(x2),max(x2)),
241 + scale_fill_gradientn(colours = c("blue", "purple" , "red","orange"), 207 xlim=c(min(x1),max(x1)),
242 space = "Lab", na.value = "black", name = "Acq")) 208 xlab="",ylab="",main=""){
243 209
244 ## 2) Calibrant images: 210 df <- data.frame(x1,x2)
245 211 x <- densCols(x1,x2, colramp=colorRampPalette(c("black", "white")))
246 212 df\$dens <- col2rgb(x)[1,] + 1L
247 213 cols <- colorRampPalette(c("#000099", "#00FEFF", "#45FE4F","#FCFF00", "#FF9400", "#FF3100"))(256)
248 if (length(inputmasses) != 0) 214 df\$col <- cols[df\$dens]
249 { for (mass in 1:length(inputmasses)) 215 plot(x2~x1, data=df[order(df\$dens),],
250 216 ylim=ylim,xlim=xlim,pch=20,col=col,
217 cex=1,xlab=xlab,ylab=ylab,las=1,
218 main=main)
219 }
220
221 ## Variables for plots
222 xrange = 1
223 yrange = 1
224 maxx = max(coord(msidata)[,1])+xrange
225 minx = min(coord(msidata)[,1])-xrange
226 maxy = max(coord(msidata)[,2])+yrange
227 miny = min(coord(msidata)[,2])-yrange
228
229 ############################################################################
230
231 ## 1) Acquisition image
232
233 pixelnumber = 1:pixelcount
234 pixelxyarray=cbind(coord(msidata),pixelnumber)
235
236 print(ggplot(pixelxyarray, aes(x=x, y=y, fill=pixelnumber))
237 + geom_tile() + coord_fixed()
238 + ggtitle("1) Order of Acquisition")
239 +theme_bw()
240 + scale_fill_gradientn(colours = c("blue", "purple" , "red","orange"),
241 space = "Lab", na.value = "black", name = "Acq"))
242
243 ## 2) Number of calibrants per spectrum
244
245 pixelmatrix = matrix(ncol=ncol(msidata), nrow=0)
246 inputcalibrantmasses = inputcalibrants[,1]
247
248 if (length(inputcalibrantmasses) != 0)
249 { for (calibrantnr in 1:length(inputcalibrantmasses))
250 {
251 calibrantmz = inputcalibrantmasses[calibrantnr]
252 calibrantfeaturemin = features(msidata, mz=calibrantmz-$plusminusinDalton)
253 calibrantfeaturemax = features(msidata, mz=calibrantmz+$plusminusinDalton)
254
255 if (calibrantfeaturemin == calibrantfeaturemax)
256 {
257
258 calibrantintensity = spectra(msidata)[calibrantfeaturemin,]
259
260 }else{
261
262 calibrantintensity = colSums(spectra(msidata)[calibrantfeaturemin:calibrantfeaturemax,] )
263
264 }
265 pixelmatrix = rbind(pixelmatrix, calibrantintensity)
266 }
267
268 countvector= as.factor(colSums(pixelmatrix>0))
269 countdf= cbind(coord(msidata), countvector)
270 mycolours = c("black","grey", "darkblue", "blue", "green" , "red", "yellow", "magenta", "olivedrap1", "lightseagreen")
271
272 print(ggplot(countdf, aes(x=x, y=y, fill=countvector))
273 + geom_tile() + coord_fixed()
274 + ggtitle("2) Number of calibrants per pixel")
275 + theme_bw()
276 + theme(text=element_text(family="ArialMT", face="bold", size=12))
277 + scale_fill_manual(values = mycolours[1:length(countvector)],
278 na.value = "black", name = "# calibrants"))
279 }else{print("2) The inputcalibrant masses were outside the mass range")}
280
281
282 ############# new 2b) image of foldchanges (log2 intensity ratios) between two masses in the same spectrum
283
284 #if $calibrantratio:
285 #for $foldchanges in $calibrantratio:
286 mass1 = $foldchanges.mass1
287 mass2 = $foldchanges.mass2
288 distance = $foldchanges.distance
289
290 ### find rows which contain masses:
291
292 mzrowdown1 = features(msidata, mz = mass1-distance)
293 mzrowup1 = features(msidata, mz = mass1+distance)
294 mzrowdown2 = features(msidata, mz = mass2-distance)
295 mzrowup2 = features(msidata, mz = mass2+distance)
296
297 ### lower and upperlimit for the plot
298 mzdown1 = features(msidata, mz = mass1-2)
299 mzup1 = features(msidata, mz = mass1+3)
300 mzdown2 = features(msidata, mz = mass2-2)
301 mzup2 = features(msidata, mz = mass2+3)
302
303 ### plot the part which was chosen, with chosen value in blue, distance in blue, maxmass in red, xlim fixed to 5 Da window
304
305 if (mzrowdown1 == mzrowup1)
306 {
307 maxmassrow1 = spectra(msidata)[mzrowup1,]
308 maxmass1 = mz(msidata)[mzrowup1][which.max(maxmassrow1)]
309 }else{
310 maxmassrow1 = rowMeans(spectra(msidata)[mzrowdown1:mzrowup1,])
311 maxmass1 = mz(msidata)[mzrowdown1:mzrowup1][which.max(maxmassrow1)]
312 }
313 if (mzrowdown2 == mzrowup2)
314 {
315 maxmassrow2 = spectra(msidata)[mzrowup2,]
316 maxmass2 = mz(msidata)[mzrowup2][which.max(maxmassrow2)]
317 }else{
318 maxmassrow2 = rowMeans(spectra(msidata)[mzrowdown2:mzrowup2,])
319 maxmass2 = mz(msidata)[mzrowdown2:mzrowup2][which.max(maxmassrow2)]
320 }
321
322 par(mfrow=c(2,1), oma=c(0,0,2,0))
323 plot(msidata[mzdown1:mzup1,], pixel = 1:pixelcount, main=paste0("average spectrum ", mass1, " Da"))
324 abline(v=c(mass1-distance, mass1, mass1+distance), col="blue",lty=c(3,5,3))
325 abline(v=maxmass1, col="red", lty=5)
326
327 plot(msidata[mzdown2:mzup2,], pixel = 1:pixelcount, main= paste0("average spectrum ", mass2, " Da"))
328 abline(v=c(mass2-distance, mass2, mass2+distance), col="blue", lty=c(3,5,3))
329 abline(v=maxmass2, col="red", lty=5)
330 title("Control of fold change plot", outer=TRUE)
331
332 ### filter spectra for maxmass to have two vectors, which can be divided
333
334 mass1vector = spectra(msidata)[features(msidata, mz = maxmass1),]
335 mass2vector = spectra(msidata)[features(msidata, mz = maxmass2),]
336
337 foldchange = log2(mass1vector/mass2vector)
338
339 ratiomatrix = cbind(foldchange, coord(msidata))
340
341 print(ggplot(ratiomatrix, aes(x=x, y=y, fill=foldchange), colour=colo)
342 +scale_y_reverse() + geom_tile() + coord_fixed()
343 + ggtitle(paste0("Fold change ", mass1, " Da / ", mass2, " Da"))
344 + theme_bw()
345 + theme(text=element_text(family="ArialMT", face="bold", size=12))
346 + scale_fill_gradientn(colours = c("blue", "purple" , "red","orange")
347 ,space = "Lab", na.value = "black", name ="FC"))
348 #end for
349 #end if
350
351 ## 3) Calibrant images:
352
353 if (length(inputmasses) != 0)
354 { for (mass in 1:length(inputmasses))
355 {
356 image(msidata, mz=inputmasses[mass], plusminus=$plusminusinDalton,
357 main= paste0("3",LETTERS[mass], ") ", inputnames[mass], " (", round(inputmasses[mass], digits = 2), " Da)"),
358 contrast.enhance = "histogram", ylim=c(maxy+1, 0))
359 }
360 } else {print("3) The inputpeptide masses were outside the mass range")}
361
362 ## 4) Number of peaks per pixel - image
363
364 peaksperpixel = colSums(spectra(msidata)[]> 0)
365 peakscoordarray=cbind(coord(msidata), peaksperpixel)
366
367 print(ggplot(peakscoordarray, aes(x=x, y=y, fill=peaksperpixel), colour=colo)
368 + geom_tile() + coord_fixed()
369 + ggtitle("4) Number of peaks per pixel")
370 + theme_bw()
371 + theme(text=element_text(family="ArialMT", face="bold", size=12))
372 + scale_fill_gradientn(colours = c("blue", "purple" , "red","orange")
373 ,space = "Lab", na.value = "black", name = "# peaks"))
374
375 ## 5) TIC image
376 TICcoordarray=cbind(coord(msidata), TICs)
377 colo <- colorRampPalette(
378 c("blue", "cyan", "green", "yellow","red"))
379 print(ggplot(TICcoordarray, aes(x=x, y=y, fill=TICs), colour=colo)
380 + geom_tile() + coord_fixed()
381 + ggtitle("5) Total Ion Chromatogram")
382 + theme_bw()
383 + theme(text=element_text(family="ArialMT", face="bold", size=12))
384 + scale_fill_gradientn(colours = c("blue", "purple" , "red","orange")
385 ,space = "Lab", na.value = "black", name = "TIC"))
386
387 ## 6) Most abundant mass image
388
389 highestmz = apply(spectra(msidata)[],2,which.max)
390 highestmz_matrix = cbind(coord(msidata),mz(msidata)[highestmz])
391 colnames(highestmz_matrix)[3] = "highestmzinDa"
392
393 print(ggplot(highestmz_matrix, aes(x=x, y=y, fill=highestmzinDa))
394 + geom_tile() + coord_fixed()
395 + ggtitle("6) Most abundant m/z in each pixel")
396 + theme_bw()
397 + scale_fill_gradientn(colours = c("blue", "purple" , "red","orange"), space = "Lab", na.value = "black", name = "m/z",
398 labels = as.character(pretty(highestmz_matrix\$highestmzinDa)[c(1,3,5,7)]),
399 breaks = pretty(highestmz_matrix\$highestmzinDa)[c(1,3,5,7)], limits=c(min(highestmz_matrix\$highestmzinDa), max(highestmz_matrix\$highestmzinDa)))
400 + theme(text=element_text(family="ArialMT", face="bold", size=12)))
401
402 ## which mz are highest
403 highestmz_peptides = names(sort(table(round(highestmz_matrix\$highestmzinDa, digits=0)), decreasing=TRUE)[1])
404 highestmz_pixel = which(round(highestmz_matrix\$highestmzinDa, digits=0) == highestmz_peptides)[1]
405
406 secondhighestmz = names(sort(table(round(highestmz_matrix\$highestmzinDa, digits=0)), decreasing=TRUE)[2])
407 secondhighestmz_pixel = which(round(highestmz_matrix\$highestmzinDa, digits=0) == secondhighestmz)[1]
408
409 ## 7) pca image for two components
410 pca <- PCA(msidata, ncomp=2)
411 par(mfrow = c(2,1))
412 plot(pca, col=c("black", "darkgrey"), main="7) PCA for two components")
413 image(pca, col=c("black", "white"),ylim=c(maxy+1, 0))
414
415
416 ############################# III) properties over acquisition (spectra index)##########
417 ##############################################################################
418
419 par(mfrow = c(2,1), mar=c(5,6,4,2))
420
421 ## 8a) number of peaks per spectrum - scatterplot
422 plot_colorByDensity(pixels(msidata), peaksperpixel, ylab = "", xlab = "", main="8a) Number of peaks per spectrum")
423 title(xlab="Spectra index \n (= Acquisition time)", line=3)
424 title(ylab="Number of peaks", line=4)
425
426 ## 8b) number of peaks per spectrum - histogram
427 hist(peaksperpixel, main="", las=1, xlab = "Number of peaks per spectrum", ylab="")
428 title(main="8b) Number of peaks per spectrum", line=2)
429 title(ylab="Frequency = # spectra", line=4)
430 abline(v=median(peaksperpixel), col="blue")
431
432 ## 9a) TIC per spectrum - density scatterplot
433 zero=0
434 par(mfrow = c(2,1), mar=c(5,6,4,2))
435 plot_colorByDensity(pixels(msidata), TICs, ylab = "", xlab = "", main="9a) TIC per pixel")
436 title(xlab="Spectra index \n (= Acquisition time)", line=3)
437 title(ylab = "Total ion chromatogram intensity", line=4)
438
439 ## 9b) TIC per spectrum - histogram
440 hist(log(TICs), main="", las=1, xlab = "log(TIC per spectrum)", ylab="")
441 title(main= "9b) TIC per spectrum", line=2)
442 title(ylab="Frequency = # spectra", line=4)
443 abline(v=median(log(TICs[TICs>0])), col="blue")
444
445
446 ## 10) intensity of chosen peptides over acquisition (pixel index)
447
448 if (length(inputcalibrants[,1]) != 0)
449 {
450 par(mfrow = c(3, 2), oma=c(0,0,2,0))
451 intensityvector = vector()
452 for (mzvalue in 1:length(inputcalibrants[,1]))
453 {
454 mznumber = features(msidata, mz = inputcalibrants[,1][mzvalue])
455 intensityvector = spectra(msidata)[][mznumber,]
456 plot(intensityvector, main=inputnames[mzvalue], xlab="Spectra index \n (= Acquisition time)")
457 }
458 title("10) intensity of calibrants over acquisition", outer=TRUE)
459 }else{print("10) The inputcalibrant masses were outside the mass range")}
460
461 ################################## IV) changes over mz ############################
462 ###################################################################################
463
464 ## 11) Number of peaks per mz
465 ## Number of peaks per mz - number across all pixel
466 peakspermz = rowSums(spectra(msidata)[] > 0 )
467
468 par(mfrow = c(2,1), mar=c(5,6,4,4.5))
469 ## 11a) Number of peaks per mz - scatterplot
470 plot_colorByDensity(mz(msidata),peakspermz, main= "11a) Number of peaks for each mz", ylab ="")
471 title(xlab="mz in Dalton", line=2.5)
472 title(ylab = "Number of peaks", line=4)
473 axis(4, at=pretty(peakspermz),labels=as.character(round((pretty(peakspermz)/pixelcount*100), digits=1)), las=1)
474 mtext("Coverage of spectra [%]", 4, line=3, adj=1)
475
476 # make plot smaller to fit axis and labels, add second y axis with %
477 ## 11b) Number of peaks per mz - histogram
478 hist(peakspermz, main="", las=1, ylab="")
479 title(ylab = "Frequency", line=4)
480 title(main="11b) Number of peaks per mz", xlab = "Number of peaks per mz", line=2)
481 abline(v=median(peakspermz), col="blue")
482
483
484 ## 12) Sum of intensities per mz
485
486 ## Sum of all intensities for each mz (like TIC, but for mz instead of pixel)
487 mzTIC = rowSums(spectra(msidata)[]) # calculate intensity sum for each mz
488
489 par(mfrow = c(2,1), mar=c(5,6,4,2))
490 # 12a) sum of intensities per mz - scatterplot
491 plot_colorByDensity(mz(msidata),mzTIC, main= "12a) Sum of all peak intensities for each mz", ylab ="")
492 title(xlab="mz in Dalton", line=2.5)
493 title(ylab="Intensity sum", line=4)
494 # 12b) sum of intensities per mz - histogram
495 hist(log(mzTIC), main="", xlab = "", las=1, ylab="")
496 title(main="12b) Sum of intensities per mz", line=2, ylab="")
497 title(xlab = "log (sum of intensities per mz)")
498 title(ylab = "Frequency", line=4)
499 abline(v=median(log(mzTIC[mzTIC>0])), col="blue")
500
501 ################################## V) general plots ############################
502 ###################################################################################
503
504 ## 13) Intensity distribution
505
506 par(mfrow = c(2,1), mar=c(5,6,4,2))
507
508 ## 13a) Intensity histogram:
509 hist(log2(spectra(msidata)[]), main="", xlab = "", ylab="", las=1)
510 title(main="13a) Log2-transformed intensities", line=2)
511 title(xlab="log2 intensities")
512 title(ylab="Frequency", line=4)
513 abline(v=median(log2(spectra(msidata)[(spectra(msidata)>0)])), col="blue")
514
515 ## 13b) Median intensity over spectra
516 medianint_spectra = apply(spectra(msidata), 2, median)
517 plot(medianint_spectra, main="13b) Median intensity per spectrum",las=1, xlab="Spectra index \n (= Acquisition time)", ylab="")
518 title(ylab="Median spectrum intensity", line=4)
519
520 ## 14) Mass spectra
521
522 par(mfrow = c(2, 2))
523 plot(msidata, pixel = 1:length(pixelnumber), main= "Average spectrum")
524 plot(msidata, pixel =round(length(pixelnumber)/2, digits=0), main="Spectrum in middle of acquisition")
525 plot(msidata, pixel = highestmz_pixel, main= paste0("Spectrum at ", rownames(coord(msidata)[highestmz_pixel,])))
526 plot(msidata, pixel = secondhighestmz_pixel, main= paste0("Spectrum at ", rownames(coord(msidata)[secondhighestmz_pixel,])))
527
528 ## 15) Zoomed in mass spectra for calibrants
529 plusminusvalue = $plusminusinDalton
530 x = 1
531 if (length(inputcalibrantmasses) != 0)
251 { 532 {
252 image(msidata, mz=inputmasses[mass], plusminus=$plusminusinDalton, 533
253 main= paste0("2",LETTERS[mass], ") ", inputnames[mass], " (", round(inputmasses[mass], digits = 2), " Da)"), 534 for (calibrant in inputcalibrantmasses)
254 contrast.enhance = "histogram") 535 {
255 } 536 minmasspixel = features(msidata, mz=calibrant-1)
256 } else {print("The inputpeptide masses were outside the mass range")} 537 maxmasspixel = features(msidata, mz=calibrant+3)
257 538 par(mfrow = c(2, 2), oma=c(0,0,2,0))
258 ## 3) Number of peaks per pixel - image 539 plot(msidata[minmasspixel:maxmasspixel,], pixel = 1:length(pixelnumber), main= "average spectrum")
259 540 abline(v=c(calibrant-plusminusvalue, calibrant,calibrant+plusminusvalue), col="blue", lty=c(3,5,3))
260 (ggplot(peakscoordarray, aes(x=x, y=y, fill=peaksperpixel), colour=colo) 541 plot(msidata[minmasspixel:maxmasspixel,], pixel =round(pixelnumber/2, digits=0), main="pixel in middle of acquisition")
261 +scale_y_reverse(lim=c(maxy,miny)) + geom_tile() + coord_fixed() 542 abline(v=c(calibrant-plusminusvalue, calibrant,calibrant+plusminusvalue), col="blue", lty=c(3,5,3))
262 + ggtitle("3) Number of peaks per pixel") 543 plot(msidata[minmasspixel:maxmasspixel,], pixel = highestmz_pixel,main= paste0("Spectrum at ", rownames(coord(msidata)[highestmz_pixel,])))
263 + theme_bw() 544 abline(v=c(calibrant-plusminusvalue, calibrant,calibrant+plusminusvalue), col="blue", lty=c(3,5,3))
264 + theme(text=element_text(family="ArialMT", face="bold", size=12)) 545 plot(msidata[minmasspixel:maxmasspixel,], pixel = secondhighestmz_pixel, main= paste0("Spectrum at ", rownames(coord(msidata)[secondhighestmz_pixel,])))
265 + scale_fill_gradientn(colours = c("blue", "purple" , "red","orange") 546 abline(v=c(calibrant-plusminusvalue, calibrant,calibrant+plusminusvalue), col="blue", lty=c(3,5,3))
266 ,space = "Lab", na.value = "black", name = "# peaks")) 547 title(paste0(inputcalibrants[x,1]), outer=TRUE)
267 548 x=x+1
268 549 }
269 ## 4) TIC image 550
270 TICcoordarray=cbind(coord(msidata), TICs) 551 }else{print("15) The inputcalibrant masses were outside the mass range")}
271 colo <- colorRampPalette(
272 c('blue', 'cyan', 'green', 'yellow','red'))
273 (ggplot(TICcoordarray, aes(x=x, y=y, fill=TICs), colour=colo)
274 +scale_y_reverse(lim=c(maxy,miny)) + geom_tile() + coord_fixed()
275 + ggtitle("4) Total Ion Chromatogram")
276 + theme_bw()
277 + theme(text=element_text(family="ArialMT", face="bold", size=12))
278 + scale_fill_gradientn(colours = c("blue", "purple" , "red","orange")
279 ,space = "Lab", na.value = "black", name = "TIC"))
280
281 ## 5) Most abundant mass image
282
283 (ggplot(highestmz_matrix, aes(x=x, y=y, fill=highestmzinDa))
284 +scale_y_reverse(lim=c(maxy,miny)) + geom_tile() + coord_fixed()
285 + ggtitle("5) Most abundant m/z in each pixel")
286 + theme_bw()
287 + scale_fill_gradientn(colours = c("blue", "purple" , "red","orange"), space = "Lab", na.value = "black", name = "m/z",
288 labels = as.character(pretty(highestmz_matrix\$highestmzinDa)[c(1,3,5,7)]),
289 breaks = pretty(highestmz_matrix\$highestmzinDa)[c(1,3,5,7)], limits=c(min(highestmz_matrix\$highestmzinDa), max(highestmz_matrix\$highestmzinDa)))
290 + theme(text=element_text(family="ArialMT", face="bold", size=12)))
291
292 ## which mz are highest
293 highestmz_peptides = names(sort(table(round(highestmz_matrix\$highestmzinDa, digits=0)), decreasing=TRUE)[1])
294 highestmz_pixel = which(round(highestmz_matrix\$highestmzinDa, digits=0) == highestmz_peptides)[1]
295
296 secondhighestmz = names(sort(table(round(highestmz_matrix\$highestmzinDa, digits=0)), decreasing=TRUE)[2])
297 secondhighestmz_pixel = which(round(highestmz_matrix\$highestmzinDa, digits=0) == secondhighestmz)[1]
298
299
300
301 ## 6) pca image for two components
302 pca <- PCA(msidata, ncomp=2)
303 par(mfrow = c(2,1))
304 plot(pca, col=c("black", "darkgrey"), main="6) PCA for two components")
305 image(pca, ylim = c(-1, maxy), col=c("black", "white"))
306
307
308 ############################# III) properties over acquisition (spectra index)##########
309 ##############################################################################
310
311 par(mfrow = c(2,1), mar=c(5,6,4,2))
312
313 ## 7a) number of peaks per spectrum - scatterplot
314 plot_colorByDensity(pixels(msidata), peaksperpixel, ylab = "", xlab = "", main="7a) Number of peaks per spectrum")
315 title(xlab="Spectra index \n (= Acquisition time)", line=3)
316 title(ylab="Number of peaks", line=4)
317
318 ## 7b) number of peaks per spectrum - histogram
319 hist(peaksperpixel, main="", las=1, xlab = "Number of peaks per spectrum", ylab="")
320 title(main="7b) Number of peaks per spectrum", line=2)
321 title(ylab="Frequency = # spectra", line=4)
322 abline(v=median(peaksperpixel), col="blue")
323
324 ## 8a) TIC per spectrum - density scatterplot
325 zero=0
326 par(mfrow = c(2,1), mar=c(5,6,4,2))
327 plot_colorByDensity(pixels(msidata), TICs, ylab = "", xlab = "", main="8a) TIC per pixel")
328 title(xlab="Spectra index \n (= Acquisition time)", line=3)
329 title(ylab = "Total ion chromatogram intensity", line=4)
330
331 ## 8b) TIC per spectrum - histogram
332 hist(log(TICs), main="", las=1, xlab = "log(TIC per spectrum)", ylab="")
333 title(main= "8b) TIC per spectrum", line=2)
334 title(ylab="Frequency = # spectra", line=4)
335 abline(v=median(log(TICs[TICs>0])), col="blue")
336
337
338 ## 9) intensity of chosen peptides over acquisition (pixel index)
339
340 if (length(inputmasses) != 0)
341 {
342
343 par(mfrow = c(3, 2))
344 intensityvector = vector()
345 for (mass in 1:length(inputmasses))
346 {
347 mznumber = features(msidata, mz = inputmasses[mass])
348 intensityvector = spectra(msidata)[][mznumber,]
349 plot(intensityvector, main=inputnames[mass], xlab="Spectra index \n (= Acquisition time)")
350 }
351 } else {print("The inputpeptide masses were outside the mass range")}
352
353 ################################## IV) changes over mz ############################
354 ###################################################################################
355
356 ## 10) Number of peaks per mz
357
358 par(mfrow = c(2,1), mar=c(5,6,4,4.5))
359 ## 10a) Number of peaks per mz - scatterplot
360 plot_colorByDensity(mz(msidata),peakspermz, main= "10a) Number of peaks for each mz", ylab ="")
361 title(xlab="mz in Dalton", line=2.5)
362 title(ylab = "Number of peaks", line=4)
363 axis(4, at=pretty(peakspermz),labels=as.character(round((pretty(peakspermz)/pixelcount*100), digits=1)), las=1)
364 mtext("Coverage of spectra [%]", 4, line=3, adj=1)
365
366 # make plot smaller to fit axis and labels, add second y axis with %
367 ## 10b) Number of peaks per mz - histogram
368 hist(peakspermz, main="", las=1, ylab="", xlab="")
369 title(ylab = "Frequency", line=4)
370 title(main="10b) Number of peaks per mz", xlab = "Number of peaks per mz", line=2)
371 abline(v=median(peakspermz), col="blue")
372
373
374 ## 11) Sum of intensities per mz
375
376 par(mfrow = c(2,1), mar=c(5,6,4,2))
377 # 11a) sum of intensities per mz - scatterplot
378 plot_colorByDensity(mz(msidata),mzTIC, main= "11a) Sum of all peak intensities for each mz", ylab ="")
379 title(xlab="mz in Dalton", line=2.5)
380 title(ylab="Intensity sum", line=4)
381 # 11b) sum of intensities per mz - histogram
382 hist(log(mzTIC), main="", xlab = "", las=1, ylab="")
383 title(main="11b) Sum of intensities per mz", line=2, ylab="")
384 title(xlab = "log (sum of intensities per mz)")
385 title(ylab = "Frequency", line=4)
386 abline(v=median(log(mzTIC[mzTIC>0])), col="blue")
387
388
389
390 ################################## V) general plots ############################
391 ###################################################################################
392
393
394 ## 12) Intensity distribution
395
396 par(mfrow = c(2,1), mar=c(5,6,4,2))
397
398 ## 12a) Intensity histogram:
399 hist(log2(spectra(msidata)[]), main="", xlab = "", ylab="", las=1)
400 title(main="12a) Log2-transformed intensities", line=2)
401 title(xlab="log2 intensities")
402 title(ylab="Frequency", line=4)
403 abline(v=median(log2(spectra(msidata)[(spectra(msidata)>0)])), col="blue")
404
405 ## 12b) Median intensity over spectra
406 medianint_spectra = apply(spectra(msidata), 2, median)
407 plot(medianint_spectra, main="12b) Median intensity per spectrum",las=1, xlab="Spectra index \n (= Acquisition time)", ylab="")
408 title(ylab="Median spectrum intensity", line=4)
409
410 ## 13) Mass spectra
411
412 par(mfrow = c(2, 2))
413 plot(msidata, pixel = 1:length(pixelnumber), main= "Average spectrum")
414 plot(msidata, pixel =round(length(pixelnumber)/2, digits=0), main="Spectrum in middle of acquisition")
415 plot(msidata, pixel = highestmz_pixel, main= paste0("Spectrum at ", rownames(coord(msidata)[highestmz_pixel,])))
416 plot(msidata, pixel = secondhighestmz_pixel, main= paste0("Spectrum at ", rownames(coord(msidata)[secondhighestmz_pixel,])))
417 552
418 dev.off() 553 dev.off()
554 }else{
555 print("inputfile has no intensities > 0")
556 dev.off()
557 }
419 558
420 ]]></configfile> 559 ]]></configfile>
421 </configfiles> 560 </configfiles>
422 <inputs> 561 <inputs>
423 <param name="infile" type="data" format="imzml, rdata, analyze75" label="Inputfile as imzML, Analyze7.5 or Cardinal MSImageSet saved as RData" 562 <param name="infile" type="data" format="imzml, rdata, analyze75" label="Inputfile as imzML, Analyze7.5 or Cardinal MSImageSet saved as RData"
424 help="Upload composite datatype imzml (ibd+imzML) or analyze75 (hdr+img+t2m) or regular upload .RData (Cardinal MSImageSet)"/> 563 help="Upload composite datatype imzml (ibd+imzML) or analyze75 (hdr+img+t2m) or regular upload .RData (Cardinal MSImageSet)"/>
425 <param name="filename" type="text" value="" optional="true" label="Title" help="will appear in the quality report. If nothing given it will take the dataset name."/> 564 <param name="filename" type="text" value="" optional="true" label="Title" help="will appear in the quality report. If nothing given it will take the dataset name."/>
426 <param name="inputpeptidefile" type="data" optional="true" format="txt, csv" label="Text file with peptidemasses and names" 565 <param name="inputpeptidefile" type="data" optional="true" format="txt, csv" label="Text file with peptidemasses and names"
427 help="first column peptide m/z, second column peptide name, tab separated file"/> 566 help="first column peptide m/z, second column peptide name, tab separated file"/>
567 <param name="inputcalibrants" type="data" optional="true" format="txt,csv"
568 label="Internal calibrants"
569 help="Used for plot number of calibrant per spectrum and for zoomed in mass spectra"/>
428 <param name="plusminusinDalton" value="0.25" type="text" label="Mass range" help="plusminus mass window in Dalton"/> 570 <param name="plusminusinDalton" value="0.25" type="text" label="Mass range" help="plusminus mass window in Dalton"/>
571 <repeat name="calibrantratio" title="Plot fold change of two masses for each spectrum" min="0" max="10">
572 <param name="mass1" value="1111" type="float" label="Mass 1" help="First mass in Dalton"/>
573 <param name="mass2" value="2222" type="float" label="Mass 2" help="Second mass in Dalton"/>
574 <param name="distance" value="0.25" type="float" label="Distance in Dalton" help="Distance in Da used to find peak maximum from input masses in both directions"/>
575 </repeat>
429 </inputs> 576 </inputs>
430 <outputs> 577 <outputs>
431 <data format="pdf" name="plots" from_work_dir="qualitycontrol.pdf" label="${tool.name} on $infile.display_name"/> 578 <data format="pdf" name="plots" from_work_dir="qualitycontrol.pdf" label = "${tool.name} on $infile.display_name"/>
432 </outputs> 579 </outputs>
580
433 <tests> 581 <tests>
434 <test> 582 <test>
435 <param name="infile" value="" ftype="imzml"> 583 <param name="infile" value="" ftype="imzml">
436 <composite_data value="Example_Continuous.imzML" ftype="imzml"/> 584 <composite_data value="Example_Continuous.imzML" />
437 <composite_data value="Example_Continuous.ibd" ftype="ibd"/> 585 <composite_data value="Example_Continuous.ibd" />
438 </param> 586 </param>
439 <param name="inputpeptidefile" value="inputpeptides.csv" ftype="csv"/> 587 <param name="inputpeptidefile" value="inputpeptides.csv" ftype="csv"/>
588 <param name="inputcalibrants" ftype="txt" value="inputcalibrantfile1.txt"/>
440 <param name="plusminusinDalton" value="0.25"/> 589 <param name="plusminusinDalton" value="0.25"/>
441 <param name="filename" value="Testfile_imzml"/> 590 <param name="filename" value="Testfile_imzml"/>
591 <repeat name="calibrantratio">
592 <param name="mass1" value="111"/>
593 <param name="mass2" value="222"/>
594 <param name="distance" value="0.25"/>
595 </repeat>
442 <output name="plots" file="Testfile_qualitycontrol_imzml.pdf" compare="sim_size" delta="20000"/> 596 <output name="plots" file="Testfile_qualitycontrol_imzml.pdf" compare="sim_size" delta="20000"/>
443 </test> 597 </test>
598
444 <test> 599 <test>
445 <param name="infile" value="" ftype="analyze75"> 600 <param name="infile" value="" ftype="analyze75">
446 <composite_data value="Analyze75.hdr" ftype="hdr"/> 601 <composite_data value="Analyze75.hdr"/>
447 <composite_data value="Analyze75.img" ftype="img"/> 602 <composite_data value="Analyze75.img"/>
448 <composite_data value="Analyze75.t2m" ftype="t2m"/> 603 <composite_data value="Analyze75.t2m"/>
449 </param> 604 </param>
450 <param name="inputpeptidefile" value="inputpeptides.txt" ftype="txt"/> 605 <param name="inputpeptidefile" value="inputpeptides.txt" ftype="txt"/>
606 <param name="inputcalibrants" ftype="txt" value="inputcalibrantfile2.txt"/>
451 <param name="plusminusinDalton" value="0.5"/> 607 <param name="plusminusinDalton" value="0.5"/>
452 <param name="filename" value="Testfile_analyze75"/> 608 <param name="filename" value="Testfile_analyze75"/>
453 <output name="plots" file="Testfile_qualitycontrol_analyze75.pdf" compare="sim_size" delta="20000"/> 609 <output name="plots" file="Testfile_qualitycontrol_analyze75.pdf" compare="sim_size" delta="20000"/>
454 </test> 610 </test>
611
455 <test> 612 <test>
456 <param name="infile" value="example_continousS042.RData" ftype="rdata"/> 613 <param name="infile" value="preprocessing_results1.RData" ftype="rdata"/>
457 <param name="inputpeptidefile" value="inputpeptides.csv" ftype="txt"/> 614 <param name="inputpeptidefile" value="inputpeptides.csv" ftype="txt"/>
615 <param name="inputcalibrants" ftype="txt" value="inputcalibrantfile1.txt"/>
458 <param name="plusminusinDalton" value="0.1"/> 616 <param name="plusminusinDalton" value="0.1"/>
459 <param name="filename" value="Testfile_rdata"/> 617 <param name="filename" value="Testfile_rdata"/>
460 <output name="plots" file="Testfile_qualitycontrol_rdata.pdf" compare="sim_size" delta="20000"/> 618 <output name="plots" file="Testfile_qualitycontrol_rdata.pdf" compare="sim_size" delta="20000"/>
461 </test> 619 </test>
620 <test>
621 <param name="infile" value="LM8_file16.rdata" ftype="rdata"/>
622 <param name="inputpeptidefile" value="inputpeptides.txt" ftype="txt"/>
623 <param name="inputcalibrants" ftype="txt" value="inputcalibrantfile2.txt"/>
624 <param name="plusminusinDalton" value="0.1"/>
625 <param name="filename" value="Testfile_rdata"/>
626 <output name="plots" file="LM8_file16output.pdf" compare="sim_size" delta="20000"/>
627 </test>
462 </tests> 628 </tests>
463 <help> 629 <help>
464 <![CDATA[ 630 <![CDATA[
465 Quality control for maldi imaging mass spectrometry data. 631 Quality control for maldi imaging mass spectrometry data.
632
466 633
467 Input data: 3 types of input data can be used: 634 Input data: 3 types of input data can be used:
468 635
469 - imzml file (upload imzml and ibd file via the "composite" function) `Introduction to the imzml format <http://ms-imaging.org/wp/introduction/>`_ 636 - imzml file (upload imzml and ibd file via the "composite" function) `Introduction to the imzml format <http://ms-imaging.org/wp/introduction/>`_
470 - Analyze7.5 (upload hdr, img and t2m file via the "composite" function) 637 - Analyze7.5 (upload hdr, img and t2m file via the "composite" function)
471 - Cardinal "MSImageSet" data (with variable name "msidata", saved as .RData) 638 - Cardinal "MSImageSet" data (with variable name "msidata", saved as .RData)
472
473 Only for continuous imzML so far.
474 639
475 The output of this tool contains key values and plots of the imaging data as pdf. 640 The output of this tool contains key values and plots of the imaging data as pdf.
476 641
477 ]]> 642 ]]>
478 </help> 643 </help>
479 <citations> 644 <citations>
480 <citation type="doi">10.1093/bioinformatics/btv146</citation> 645 <citation type="doi">10.1093/bioinformatics/btv146</citation>
481 </citations> 646 </citations>
482 </tool> 647 </tool>
648