comparison msi_qualitycontrol.xml @ 0:845073d506a8 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/msi_qualitycontrol commit fa798afa023eea1cb183c14d0242721b2c696c21
author galaxyp
date Tue, 31 Oct 2017 06:00:03 -0400
parents
children c6bc77c4731d
comparison
equal deleted inserted replaced
-1:000000000000 0:845073d506a8
1 <tool id="Mass_spectrometry_imaging_QC" name="MSI Qualitycontrol" version="1.7.0">
2 <description>
3 mass spectrometry imaging QC
4 </description>
5 <requirements>
6 <requirement type="package" version="1.7.0">bioconductor-cardinal</requirement>
7 <requirement type="package" version="2.2.1">r-ggplot2</requirement>
8 <requirement type="package" version="1.1_2">r-rcolorbrewer</requirement>
9 <requirement type="package" version="2.2.1"> r-gridextra</requirement>
10 <requirement type="package" version="2.23_15">r-kernsmooth</requirement>
11 </requirements>
12 <command detect_errors="exit_code">
13 <![CDATA[
14
15 #if $infile.ext == 'imzml'
16 cp '${infile.extra_files_path}/imzml' infile.imzML &&
17 cp '${infile.extra_files_path}/ibd' infile.ibd &&
18 #elif $infile.ext == 'analyze75'
19 cp '${infile.extra_files_path}/hdr' infile.hdr &&
20 cp '${infile.extra_files_path}/img' infile.img &&
21 cp '${infile.extra_files_path}/t2m' infile.t2m &&
22 #else
23 ln -s '$infile' infile.RData &&
24 #end if
25 cat '${cardinal_qualitycontrol_script}' &&
26 Rscript '${cardinal_qualitycontrol_script}'
27 ]]>
28 </command>
29 <configfiles>
30 <configfile name="cardinal_qualitycontrol_script"><![CDATA[
31
32 ################################# load libraries and read file #########################
33 library(Cardinal)
34 library(ggplot2)
35 library(RColorBrewer)
36 library(gridExtra)
37 library(KernSmooth)
38
39 ## Read MALDI Imagind dataset
40
41 #if $infile.ext == 'imzml'
42 msidata <- readMSIData('infile.imzML')
43 #elif $infile.ext == 'analyze75'
44 msidata <- readMSIData('infile.hdr')
45
46 #else
47 load('infile.RData')
48 #end if
49
50 #if $inputpeptidefile:
51 ## Read tabular file with peptide masses for plots and heatmap images:
52 input_list = read.delim("$inputpeptidefile", header = FALSE, na.strings=c("","NA", "#NUM!", "#ZAHL!"), stringsAsFactors = FALSE)
53 #else
54 input_list = data.frame(0, 0)
55 #end if
56
57 ###################################### file properties in numbers ######################
58
59 ## Number of features (mz)
60 maxfeatures = length(features(msidata))
61 ## Range mz
62 minmz = round(min(mz(msidata)), digits=2)
63 maxmz = round(max(mz(msidata)), digits=2)
64 ## Number of spectra (pixels)
65 pixelcount = length(pixels(msidata))
66 ## Range x coordinates
67 minimumx = min(coord(msidata)[,1])
68 maximumx = max(coord(msidata)[,1])
69 ## Range y coordinates
70 minimumy = min(coord(msidata)[,2])
71 maximumy = max(coord(msidata)[,2])
72 ## Range of intensities
73 minint = round(min(spectra(msidata)[]), digits=2)
74 maxint = round(max(spectra(msidata)[]), digits=2)
75 medint = round(median(spectra(msidata)[]), digits=2)
76 ## Number of intensities > 0
77 npeaks= sum(spectra(msidata)[]>0)
78 ## Spectra multiplied with mz (potential number of peaks)
79 numpeaks = ncol(spectra(msidata)[])*nrow(spectra(msidata)[])
80 ## Percentage of intensities > 0
81 percpeaks = round(npeaks/numpeaks*100, digits=2)
82 ## Number of empty TICs
83 TICs = colSums(spectra(msidata)[])
84 NumemptyTIC = sum(TICs == 0)
85
86 ## Processing informations
87 processinginfo = processingData(msidata)
88 centroidedinfo = processinginfo@centroided # TRUE or FALSE
89
90 ## if TRUE write processinginfo if no write FALSE
91
92 ## normalization
93 if (length(processinginfo@normalization) == 0) {
94 normalizationinfo='FALSE'
95 } else {
96 normalizationinfo=processinginfo@normalization
97 }
98 ## smoothing
99 if (length(processinginfo@smoothing) == 0) {
100 smoothinginfo='FALSE'
101 } else {
102 smoothinginfo=processinginfo@smoothing
103 }
104 ## baseline
105 if (length(processinginfo@baselineReduction) == 0) {
106 baselinereductioninfo='FALSE'
107 } else {
108 baselinereductioninfo=processinginfo@baselineReduction
109 }
110 ## peak picking
111 if (length(processinginfo@peakPicking) == 0) {
112 peakpickinginfo='FALSE'
113 } else {
114 peakpickinginfo=processinginfo@peakPicking
115 }
116
117
118 ## calculate how many input peptide masses are valid:
119 inputpeptides = input_list[input_list[,1]>minmz & input_list[,1]<maxmz,]
120 inputmasses = inputpeptides[,1]
121 inputnames = inputpeptides[,2]
122
123 #############################################################################
124
125 properties = c("Number of mz features",
126 "Range of mz values [Da]",
127 "Number of pixels",
128 "Range of x coordinates",
129 "Range of y coordinates",
130 "Range of intensities",
131 "Median of intensities",
132 "Intensities > 0",
133 "Number of zero TICs",
134 "Preprocessing",
135 "Normalization",
136 "Smoothing",
137 "Baseline reduction",
138 "Peak picking",
139 "Centroided",
140 "# valid peptidemasses")
141
142 values = c(paste0(maxfeatures),
143 paste0(minmz, " - ", maxmz),
144 paste0(pixelcount),
145 paste0(minimumx, " - ", maximumx),
146 paste0(minimumy, " - ", maximumy),
147 paste0(minint, " - ", maxint),
148 paste0(medint),
149 paste0(percpeaks, " %"),
150 paste0(NumemptyTIC),
151 paste0(" "),
152 paste0(normalizationinfo),
153 paste0(smoothinginfo),
154 paste0(baselinereductioninfo),
155 paste0(peakpickinginfo),
156 paste0(centroidedinfo),
157 paste0(length(inputmasses)))
158
159
160 property_df = data.frame(properties, values)
161
162
163 ## Variables for plots
164 xrange = 1
165 yrange = 1
166 maxx = max(coord(msidata)[,1])+xrange
167 minx = min(coord(msidata)[,1])-xrange
168 maxy = max(coord(msidata)[,2])+yrange
169 miny = min(coord(msidata)[,2])-yrange
170
171
172 ####################################### Preparation of images #########################
173
174 ## Acquisitionorder
175
176 pixelnumber = 1:pixelcount
177 pixelxyarray=cbind(coord(msidata),pixelnumber)
178
179
180 ## Number of peaks per pixel
181 peaksperpixel = colSums(spectra(msidata)[]> 0)
182 peakscoordarray=cbind(coord(msidata), peaksperpixel)
183
184 ## Most abundant mz
185
186 highestmz = apply(spectra(msidata)[],2,which.max)
187 highestmz_matrix = cbind(coord(msidata),mz(msidata)[highestmz])
188 colnames(highestmz_matrix)[3] = "highestmzinDa"
189
190 ###################################### Preparation of plots ############################
191
192 ## function without xaxt for plots with automatic x axis
193 plot_colorByDensity = function(x1,x2,
194 ylim=c(min(x2),max(x2)),
195 xlim=c(min(x1),max(x1)),
196 xlab="",ylab="",main="") {
197
198 df <- data.frame(x1,x2)
199 x <- densCols(x1,x2, colramp=colorRampPalette(c("black", "white")))
200 df\$dens <- col2rgb(x)[1,] + 1L
201 cols <- colorRampPalette(c("#000099", "#00FEFF", "#45FE4F","#FCFF00", "#FF9400", "#FF3100"))(256)
202 df\$col <- cols[df\$dens]
203 plot(x2~x1, data=df[order(df\$dens),],
204 ylim=ylim,xlim=xlim,pch=20,col=col,
205 cex=1,xlab=xlab,ylab=ylab,las=1,
206 main=main)
207 }
208
209 ## Number of peaks per mz - number across all pixel
210 peakspermz = rowSums(spectra(msidata)[] > 0 )
211
212 ## Sum of all intensities for each mz (like TIC, but for mz instead of pixel)
213 mzTIC = rowSums(spectra(msidata)[]) # calculate intensity sum for each mz
214
215
216
217 ######################################## PDF #############################################
218 ##########################################################################################
219 ##########################################################################################
220
221
222 pdf("qualitycontrol.pdf", fonts = "Times", pointsize = 12)
223 plot(0,type='n',axes=FALSE,ann=FALSE)
224 #if not $filename:
225 #set $filename = $infile.display_name
226 #end if
227 title(main=paste("Quality control of MSI data\n\n", "Filename:", "$filename"))
228
229 ############################# I) numbers ####################################
230 #############################################################################
231 grid.table(property_df, rows= NULL)
232
233 ############################# II) ion images #################################
234 ##############################################################################
235
236 ## 1) Acquisition image
237 (ggplot(pixelxyarray, aes(x=x, y=y, fill=pixelnumber))
238 +scale_y_reverse() + geom_tile() + coord_fixed()
239 + ggtitle("1) Order of Acquisition")
240 +theme_bw()
241 + scale_fill_gradientn(colours = c("blue", "purple" , "red","orange"),
242 space = "Lab", na.value = "black", name = "Acq"))
243
244 ## 2) Calibrant images:
245
246
247
248 if (length(inputmasses) != 0)
249 { for (mass in 1:length(inputmasses))
250
251 {
252 image(msidata, mz=inputmasses[mass], plusminus=$plusminusinDalton,
253 main= paste0("2",LETTERS[mass], ") ", inputnames[mass], " (", round(inputmasses[mass], digits = 2), " Da)"),
254 contrast.enhance = "histogram")
255 }
256 } else {print("The inputpeptide masses were outside the mass range")}
257
258 ## 3) Number of peaks per pixel - image
259
260 (ggplot(peakscoordarray, aes(x=x, y=y, fill=peaksperpixel), colour=colo)
261 +scale_y_reverse(lim=c(maxy,miny)) + geom_tile() + coord_fixed()
262 + ggtitle("3) Number of peaks per pixel")
263 + theme_bw()
264 + theme(text=element_text(family="ArialMT", face="bold", size=12))
265 + scale_fill_gradientn(colours = c("blue", "purple" , "red","orange")
266 ,space = "Lab", na.value = "black", name = "# peaks"))
267
268
269 ## 4) TIC image
270 TICcoordarray=cbind(coord(msidata), TICs)
271 colo <- colorRampPalette(
272 c('blue', 'cyan', 'green', 'yellow','red'))
273 (ggplot(TICcoordarray, aes(x=x, y=y, fill=TICs), colour=colo)
274 +scale_y_reverse(lim=c(maxy,miny)) + geom_tile() + coord_fixed()
275 + ggtitle("4) Total Ion Chromatogram")
276 + theme_bw()
277 + theme(text=element_text(family="ArialMT", face="bold", size=12))
278 + scale_fill_gradientn(colours = c("blue", "purple" , "red","orange")
279 ,space = "Lab", na.value = "black", name = "TIC"))
280
281 ## 5) Most abundant mass image
282
283 (ggplot(highestmz_matrix, aes(x=x, y=y, fill=highestmzinDa))
284 +scale_y_reverse(lim=c(maxy,miny)) + geom_tile() + coord_fixed()
285 + ggtitle("5) Most abundant m/z in each pixel")
286 + theme_bw()
287 + scale_fill_gradientn(colours = c("blue", "purple" , "red","orange"), space = "Lab", na.value = "black", name = "m/z",
288 labels = as.character(pretty(highestmz_matrix\$highestmzinDa)[c(1,3,5,7)]),
289 breaks = pretty(highestmz_matrix\$highestmzinDa)[c(1,3,5,7)], limits=c(min(highestmz_matrix\$highestmzinDa), max(highestmz_matrix\$highestmzinDa)))
290 + theme(text=element_text(family="ArialMT", face="bold", size=12)))
291
292 ## which mz are highest
293 highestmz_peptides = names(sort(table(round(highestmz_matrix\$highestmzinDa, digits=0)), decreasing=TRUE)[1])
294 highestmz_pixel = which(round(highestmz_matrix\$highestmzinDa, digits=0) == highestmz_peptides)[1]
295
296 secondhighestmz = names(sort(table(round(highestmz_matrix\$highestmzinDa, digits=0)), decreasing=TRUE)[2])
297 secondhighestmz_pixel = which(round(highestmz_matrix\$highestmzinDa, digits=0) == secondhighestmz)[1]
298
299
300
301 ## 6) pca image for two components
302 pca <- PCA(msidata, ncomp=2)
303 par(mfrow = c(2,1))
304 plot(pca, col=c("black", "darkgrey"), main="6) PCA for two components")
305 image(pca, ylim = c(-1, maxy), col=c("black", "white"))
306
307
308 ############################# III) properties over acquisition (spectra index)##########
309 ##############################################################################
310
311 par(mfrow = c(2,1), mar=c(5,6,4,2))
312
313 ## 7a) number of peaks per spectrum - scatterplot
314 plot_colorByDensity(pixels(msidata), peaksperpixel, ylab = "", xlab = "", main="7a) Number of peaks per spectrum")
315 title(xlab="Spectra index \n (= Acquisition time)", line=3)
316 title(ylab="Number of peaks", line=4)
317
318 ## 7b) number of peaks per spectrum - histogram
319 hist(peaksperpixel, main="", las=1, xlab = "Number of peaks per spectrum", ylab="")
320 title(main="7b) Number of peaks per spectrum", line=2)
321 title(ylab="Frequency = # spectra", line=4)
322 abline(v=median(peaksperpixel), col="blue")
323
324 ## 8a) TIC per spectrum - density scatterplot
325 zero=0
326 par(mfrow = c(2,1), mar=c(5,6,4,2))
327 plot_colorByDensity(pixels(msidata), TICs, ylab = "", xlab = "", main="8a) TIC per pixel")
328 title(xlab="Spectra index \n (= Acquisition time)", line=3)
329 title(ylab = "Total ion chromatogram intensity", line=4)
330
331 ## 8b) TIC per spectrum - histogram
332 hist(log(TICs), main="", las=1, xlab = "log(TIC per spectrum)", ylab="")
333 title(main= "8b) TIC per spectrum", line=2)
334 title(ylab="Frequency = # spectra", line=4)
335 abline(v=median(log(TICs[TICs>0])), col="blue")
336
337
338 ## 9) intensity of chosen peptides over acquisition (pixel index)
339
340 if (length(inputmasses) != 0)
341 {
342
343 par(mfrow = c(3, 2))
344 intensityvector = vector()
345 for (mass in 1:length(inputmasses))
346 {
347 mznumber = features(msidata, mz = inputmasses[mass])
348 intensityvector = spectra(msidata)[][mznumber,]
349 plot(intensityvector, main=inputnames[mass], xlab="Spectra index \n (= Acquisition time)")
350 }
351 } else {print("The inputpeptide masses were outside the mass range")}
352
353 ################################## IV) changes over mz ############################
354 ###################################################################################
355
356 ## 10) Number of peaks per mz
357
358 par(mfrow = c(2,1), mar=c(5,6,4,4.5))
359 ## 10a) Number of peaks per mz - scatterplot
360 plot_colorByDensity(mz(msidata),peakspermz, main= "10a) Number of peaks for each mz", ylab ="")
361 title(xlab="mz in Dalton", line=2.5)
362 title(ylab = "Number of peaks", line=4)
363 axis(4, at=pretty(peakspermz),labels=as.character(round((pretty(peakspermz)/pixelcount*100), digits=1)), las=1)
364 mtext("Coverage of spectra [%]", 4, line=3, adj=1)
365
366 # make plot smaller to fit axis and labels, add second y axis with %
367 ## 10b) Number of peaks per mz - histogram
368 hist(peakspermz, main="", las=1, ylab="", xlab="")
369 title(ylab = "Frequency", line=4)
370 title(main="10b) Number of peaks per mz", xlab = "Number of peaks per mz", line=2)
371 abline(v=median(peakspermz), col="blue")
372
373
374 ## 11) Sum of intensities per mz
375
376 par(mfrow = c(2,1), mar=c(5,6,4,2))
377 # 11a) sum of intensities per mz - scatterplot
378 plot_colorByDensity(mz(msidata),mzTIC, main= "11a) Sum of all peak intensities for each mz", ylab ="")
379 title(xlab="mz in Dalton", line=2.5)
380 title(ylab="Intensity sum", line=4)
381 # 11b) sum of intensities per mz - histogram
382 hist(log(mzTIC), main="", xlab = "", las=1, ylab="")
383 title(main="11b) Sum of intensities per mz", line=2, ylab="")
384 title(xlab = "log (sum of intensities per mz)")
385 title(ylab = "Frequency", line=4)
386 abline(v=median(log(mzTIC[mzTIC>0])), col="blue")
387
388
389
390 ################################## V) general plots ############################
391 ###################################################################################
392
393
394 ## 12) Intensity distribution
395
396 par(mfrow = c(2,1), mar=c(5,6,4,2))
397
398 ## 12a) Intensity histogram:
399 hist(log2(spectra(msidata)[]), main="", xlab = "", ylab="", las=1)
400 title(main="12a) Log2-transformed intensities", line=2)
401 title(xlab="log2 intensities")
402 title(ylab="Frequency", line=4)
403 abline(v=median(log2(spectra(msidata)[(spectra(msidata)>0)])), col="blue")
404
405 ## 12b) Median intensity over spectra
406 medianint_spectra = apply(spectra(msidata), 2, median)
407 plot(medianint_spectra, main="12b) Median intensity per spectrum",las=1, xlab="Spectra index \n (= Acquisition time)", ylab="")
408 title(ylab="Median spectrum intensity", line=4)
409
410 ## 13) Mass spectra
411
412 par(mfrow = c(2, 2))
413 plot(msidata, pixel = 1:length(pixelnumber), main= "Average spectrum")
414 plot(msidata, pixel =round(length(pixelnumber)/2, digits=0), main="Spectrum in middle of acquisition")
415 plot(msidata, pixel = highestmz_pixel, main= paste0("Spectrum at ", rownames(coord(msidata)[highestmz_pixel,])))
416 plot(msidata, pixel = secondhighestmz_pixel, main= paste0("Spectrum at ", rownames(coord(msidata)[secondhighestmz_pixel,])))
417
418 dev.off()
419
420 ]]></configfile>
421 </configfiles>
422 <inputs>
423 <param name="infile" type="data" format="imzml, rdata, analyze75" label="Inputfile as imzML, Analyze7.5 or Cardinal MSImageSet saved as RData"
424 help="Upload composite datatype imzml (ibd+imzML) or analyze75 (hdr+img+t2m) or regular upload .RData (Cardinal MSImageSet)"/>
425 <param name="filename" type="text" value="" optional="true" label="Title" help="will appear in the quality report. If nothing given it will take the dataset name."/>
426 <param name="inputpeptidefile" type="data" optional="true" format="txt, csv" label="Text file with peptidemasses and names"
427 help="first column peptide m/z, second column peptide name, tab separated file"/>
428 <param name="plusminusinDalton" value="0.25" type="text" label="Mass range" help="plusminus mass window in Dalton"/>
429 </inputs>
430 <outputs>
431 <data format="pdf" name="plots" from_work_dir="qualitycontrol.pdf" label="${tool.name} on $infile.display_name"/>
432 </outputs>
433 <tests>
434 <test>
435 <param name="infile" value="" ftype="imzml">
436 <composite_data value="Example_Continuous.imzML" ftype="imzml"/>
437 <composite_data value="Example_Continuous.ibd" ftype="ibd"/>
438 </param>
439 <param name="inputpeptidefile" value="inputpeptides.csv" ftype="csv"/>
440 <param name="plusminusinDalton" value="0.25"/>
441 <param name="filename" value="Testfile_imzml"/>
442 <output name="plots" file="Testfile_qualitycontrol_imzml.pdf" compare="sim_size" delta="20000"/>
443 </test>
444 <test>
445 <param name="infile" value="" ftype="analyze75">
446 <composite_data value="Analyze75.hdr" ftype="hdr"/>
447 <composite_data value="Analyze75.img" ftype="img"/>
448 <composite_data value="Analyze75.t2m" ftype="t2m"/>
449 </param>
450 <param name="inputpeptidefile" value="inputpeptides.txt" ftype="txt"/>
451 <param name="plusminusinDalton" value="0.5"/>
452 <param name="filename" value="Testfile_analyze75"/>
453 <output name="plots" file="Testfile_qualitycontrol_analyze75.pdf" compare="sim_size" delta="20000"/>
454 </test>
455 <test>
456 <param name="infile" value="example_continousS042.RData" ftype="rdata"/>
457 <param name="inputpeptidefile" value="inputpeptides.csv" ftype="txt"/>
458 <param name="plusminusinDalton" value="0.1"/>
459 <param name="filename" value="Testfile_rdata"/>
460 <output name="plots" file="Testfile_qualitycontrol_rdata.pdf" compare="sim_size" delta="20000"/>
461 </test>
462 </tests>
463 <help>
464 <![CDATA[
465 Quality control for maldi imaging mass spectrometry data.
466
467 Input data: 3 types of input data can be used:
468
469 - imzml file (upload imzml and ibd file via the "composite" function) `Introduction to the imzml format <http://ms-imaging.org/wp/introduction/>`_
470 - Analyze7.5 (upload hdr, img and t2m file via the "composite" function)
471 - Cardinal "MSImageSet" data (with variable name "msidata", saved as .RData)
472
473 Only for continuous imzML so far.
474
475 The output of this tool contains key values and plots of the imaging data as pdf.
476
477 ]]>
478 </help>
479 <citations>
480 <citation type="doi">10.1093/bioinformatics/btv146</citation>
481 </citations>
482 </tool>