comparison maldi_quant_peakdetection.xml @ 3:36d38d2cf88c draft

planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/MALDIquant commit f127be2141cf22e269c85282d226eb16fe14a9c1
author galaxyp
date Fri, 15 Feb 2019 10:26:45 -0500
parents 17c54820f3be
children e9300ef37403
comparison
equal deleted inserted replaced
2:17c54820f3be 3:36d38d2cf88c
1 <tool id="maldi_quant_peak_detection" name="MALDIquant peak detection" version="@VERSION@.2"> 1 <tool id="maldi_quant_peak_detection" name="MALDIquant peak detection" version="@VERSION@.3">
2 <description> 2 <description>
3 Peak detection, binning and filtering for mass-spectrometry imaging data 3 Peak detection, binning and filtering for mass-spectrometry imaging data
4 </description> 4 </description>
5 <macros> 5 <macros>
6 <import>maldi_macros.xml</import> 6 <import>maldi_macros.xml</import>
35 35
36 print('Reading mask region') 36 print('Reading mask region')
37 37
38 ## Import imzML file 38 ## Import imzML file
39 coordinate_matrix = as.matrix(read.delim("$restriction_conditional.coordinates_file", header = $restriction_conditional.coordinates_header, stringsAsFactors = FALSE))[,1:2] 39 coordinate_matrix = as.matrix(read.delim("$restriction_conditional.coordinates_file", header = $restriction_conditional.coordinates_header, stringsAsFactors = FALSE))[,1:2]
40 coordinate_matrix = coordinate_matrix[,c($restriction_conditional.column_x, $restriction_conditional.column_y)]
40 41
41 maldi_data <- importImzMl('infile.imzML', 42 maldi_data <- importImzMl('infile.imzML',
42 coordinates = coordinate_matrix, centroided = $centroids) 43 coordinates = coordinate_matrix, centroided = $centroids)
43 pixelnames = paste("xy", coordinates(maldi_data)[,1],coordinates(maldi_data)[,2], sep="_") 44 pixelnames = paste("xy", coordinates(maldi_data)[,1],coordinates(maldi_data)[,2], sep="_")
44 45
97 if (centroided(msidata) == FALSE){ 98 if (centroided(msidata) == FALSE){
98 ## create mass spectrum object 99 ## create mass spectrum object
99 cardinal_mzs = Cardinal::mz(msidata) 100 cardinal_mzs = Cardinal::mz(msidata)
100 maldi_data = list() 101 maldi_data = list()
101 for(number_spectra in 1:ncol(msidata)){ 102 for(number_spectra in 1:ncol(msidata)){
102 maldi_data[[number_spectra]] = createMassSpectrum(mass = cardinal_mzs, intensity = iData(msidata)[,number_spectra]) 103 maldi_data[[number_spectra]] = createMassSpectrum(mass = cardinal_mzs, intensity = iData(msidata)[,number_spectra])}
103 coordinates_info = cbind(cardinal_coordinates, c(1:length(maldi_data)))} 104 coordinates_info = cbind(cardinal_coordinates, c(1:length(maldi_data)))
104 }else{ 105 }else{
105 peaks = list() 106 peaks = list()
106 for (spectra in 1:ncol(msidata)) 107 for (spectra in 1:ncol(msidata))
107 { 108 {
108 single_peaks = createMassPeaks(Cardinal::mz(msidata), Cardinal::spectra(msidata)[,spectra], snr=as.numeric(rep("NA", nrow(msidata)))) 109 single_peaks = createMassPeaks(Cardinal::mz(msidata), Cardinal::spectra(msidata)[,spectra], snr=as.numeric(rep("NA", nrow(msidata))))
109 peaks[[spectra]] = single_peaks 110 peaks[[spectra]] = single_peaks
110 }} 111 }
112 coordinates_info = cbind(cardinal_coordinates, c(1:length(peaks)))}
111 #end if 113 #end if
112 #end if 114 #end if
113 115
114 ## default summarized = FALSE 116 ## default summarized = FALSE
115 summarized_spectra = FALSE 117 summarized_spectra = FALSE
123 125
124 title(main=paste("$filename")) 126 title(main=paste("$filename"))
125 127
126 ## plot input file spectrum: 128 ## plot input file spectrum:
127 #if $centroids: 129 #if $centroids:
128 plot(peaks[[1]], main="First spectrum of input file") 130 ## Choose random spectra for QC plots
131 random_spectra = sample(1:length(peaks), 4, replace=FALSE)
132 par(mfrow = c(2, 2), oma=c(0,0,2,0))
133 for (random_sample in random_spectra){
134 plot(peaks[[random_sample]],sub="", main=paste0("spectrum ", random_sample))}
135 title("Input spectra", outer=TRUE, line=0)
136
129 #else 137 #else
130 avgSpectra <- averageMassSpectra(maldi_data,method="mean") 138 ## Choose random spectra for QC plots
131 plot(avgSpectra, main="Average spectrum of input file") 139 random_spectra = sample(1:length(maldi_data), 4, replace=FALSE)
140 par(mfrow = c(2, 2), oma=c(0,0,2,0))
141 for (random_sample in random_spectra){
142 plot(maldi_data[[random_sample]],sub="", main=paste0("spectrum ", random_sample))}
143 title("Input spectra", outer=TRUE, line=0)
132 #end if 144 #end if
133 145
134 146
135 ## QC numbers for input file 147 ## QC numbers for input file
136 #if str($centroids) == "TRUE" 148 #if str($centroids) == "TRUE"
160 ## read and extract x,y,annotation information 172 ## read and extract x,y,annotation information
161 input_tabular = read.delim("$tabular_annotation.annotation_file", header = $tabular_annotation.tabular_header, stringsAsFactors = FALSE) 173 input_tabular = read.delim("$tabular_annotation.annotation_file", header = $tabular_annotation.tabular_header, stringsAsFactors = FALSE)
162 annotation_input = input_tabular[,c($tabular_annotation.column_x, $tabular_annotation.column_y, $tabular_annotation.column_names)] 174 annotation_input = input_tabular[,c($tabular_annotation.column_x, $tabular_annotation.column_y, $tabular_annotation.column_names)]
163 colnames(annotation_input) = c("x", "y", "annotation") ## rename annotations header to default name "annotation" 175 colnames(annotation_input) = c("x", "y", "annotation") ## rename annotations header to default name "annotation"
164 176
165 ## merge with coordinate information of MSI data 177 ## merge provided annotation with coordinate information of MSI data
166 colnames(coordinates_info)[3] = "pixel_index" 178 colnames(coordinates_info)[3] = "pixel_index"
167 merged_annotation = merge(coordinates_info, annotation_input, by=c("x", "y"), all.x=TRUE) 179 merged_annotation = merge(coordinates_info, annotation_input, by=c("x", "y"), all.x=TRUE)
168 merged_annotation[is.na(merged_annotation)] = "NA" 180 merged_annotation[is.na(merged_annotation)] = "NA"
181 ## order coordinate information according to pixel index to make sure that the order stays the same
169 merged_annotation = merged_annotation[order(merged_annotation\$pixel_index),] 182 merged_annotation = merged_annotation[order(merged_annotation\$pixel_index),]
170 samples = as.factor(merged_annotation\$annotation) 183 samples = as.factor(merged_annotation\$annotation)
171 184
172 ## print annotation overview into PDF output 185 ## print annotation overview into PDF output
173 186
197 210
198 #if $method.methods_conditional.use_annotations: 211 #if $method.methods_conditional.use_annotations:
199 maldi_data <- averageMassSpectra(maldi_data, labels=samples,method="mean") ## use average spectra for peak picking 212 maldi_data <- averageMassSpectra(maldi_data, labels=samples,method="mean") ## use average spectra for peak picking
200 pixelnames = levels(samples) 213 pixelnames = levels(samples)
201 summarized_spectra = TRUE 214 summarized_spectra = TRUE
202 215 random_spectra = sample(1:length(maldi_data), 4, replace=TRUE)
203 #end if 216 #end if
204 217
205 peaks <- detectPeaks(maldi_data, method="$method.methods_conditional.peak_method", 218 peaks <- detectPeaks(maldi_data, method="$method.methods_conditional.peak_method",
206 halfWindowSize=$method.methods_conditional.halfWindowSize,SNR=$method.methods_conditional.snr) 219 halfWindowSize=$method.methods_conditional.halfWindowSize,SNR=$method.methods_conditional.snr)
207 220
208 ## QC plot and numbers 221 ## QC plot and numbers
209 ## plot old spectrum with baseline in blue and picked peaks in green 222 ## plot old spectra with baseline in blue and picked peaks in green
210 noise = estimateNoise(maldi_data[[1]], method= "$method.methods_conditional.peak_method") 223 par(mfrow = c(2, 2), oma=c(0,0,2,0))
211 plot(maldi_data[[1]], main="First spectrum with noise line (blue) and picked peaks (green)") 224 for (random_sample in random_spectra){
212 lines(noise[,1], noise[,2]*$method.methods_conditional.snr, col="blue") 225 noise = estimateNoise(maldi_data[[random_sample]], method= "$method.methods_conditional.peak_method")
213 points(peaks[[1]], col="green", pch=20) 226 plot(maldi_data[[random_sample]], sub="", main=paste0("spectrum ", random_sample))
227 lines(noise[,1], noise[,2]*$method.methods_conditional.snr, col="blue")
228 points(peaks[[random_sample]], col="green", pch=20)}
229 title("S/N in blue and picked peaks in green", outer=TRUE, line=0)
230
214 ## plot new spectrum 231 ## plot new spectrum
215 plot(peaks[[1]], main="First spectrum after peak detection") 232 par(mfrow = c(2, 2), oma=c(0,0,2,0))
233 for (random_sample in random_spectra){
234 plot(peaks[[random_sample]], sub="", main=paste0("spectrum ", random_sample))}
235 title("Picked peaks", outer=TRUE, line=0)
236
216 pixel_number = length(peaks) 237 pixel_number = length(peaks)
217 minmz = round(min(unlist(lapply(peaks,mass))), digits=4) 238 minmz = round(min(unlist(lapply(peaks,mass))), digits=4)
218 maxmz = round(max(unlist(lapply(peaks,mass))), digits=4) 239 maxmz = round(max(unlist(lapply(peaks,mass))), digits=4)
219 mean_features = round(length(unlist(lapply(peaks,mass)))/length(peaks), digits=2) 240 mean_features = round(length(unlist(lapply(peaks,mass)))/length(peaks), digits=2)
220 medint = round(median(unlist(lapply(peaks,intensity))), digits=2) 241 medint = round(median(unlist(lapply(peaks,intensity))), digits=2)
240 #elif str( $method.methods_conditional.method ) == 'monoisotopic_peaks': 261 #elif str( $method.methods_conditional.method ) == 'monoisotopic_peaks':
241 262
242 print('monoisotopic peaks') 263 print('monoisotopic peaks')
243 ##monoisotopic peaks 264 ##monoisotopic peaks
244 265
245 peaks = monoisotopicPeaks(peaks, minCor=$method.methods_conditional.minCor, tolerance=$method.methods_conditional.tolerance, distance=$method.methods_conditional.distance, size=$method.methods_conditional.size) 266 ## keep peaks to plot them with monoisotopic peaks
246 267 picked_peaks = peaks
247 ## QC plot and numbers 268
269 peaks = monoisotopicPeaks(peaks, minCor=$method.methods_conditional.minCor,
270 tolerance=$method.methods_conditional.tolerance,
271 distance=$method.methods_conditional.distance,
272 size=$method.methods_conditional.size)
273
248 ## plot old spectrum with picked isotopes as green dots 274 ## plot old spectrum with picked isotopes as green dots
249 plot(peaks[[1]], main="First spectrum with picked monoisotopic peaks (green)") 275 par(mfrow = c(2, 2), oma=c(0,0,2,0))
250 points(peaks[[1]], col="green", pch=20) 276 for (random_sample in random_spectra){
251 ## plot new spectrum 277 plot(picked_peaks[[random_sample]], sub="", main=paste0("spectrum ", random_sample))
252 plot(peaks[[1]], main="First spectrum after monoisotopic peaks detection") 278 points(peaks[[random_sample]], col="green", pch=20)}
279 title(paste0("Monoisotopic peaks in green"), outer=TRUE, line=0)
280
281
282 par(mfrow = c(2, 2), oma=c(0,0,2,0))
283 for (random_sample in random_spectra){
284 plot(peaks[[random_sample]], sub="", main=paste0("spectrum ", random_sample))}
285 title("Monoisotopic peaks", outer=TRUE, line=0)
286
253 minmz = round(min(unlist(lapply(peaks,mass))), digits=4) 287 minmz = round(min(unlist(lapply(peaks,mass))), digits=4)
254 maxmz = round(max(unlist(lapply(peaks,mass))), digits=4) 288 maxmz = round(max(unlist(lapply(peaks,mass))), digits=4)
255 mean_features = round(length(unlist(lapply(peaks,mass)))/length(peaks), digits=2) 289 mean_features = round(length(unlist(lapply(peaks,mass)))/length(peaks), digits=2)
256 medint = round(median(unlist(lapply(peaks,intensity))), digits=2) 290 medint = round(median(unlist(lapply(peaks,intensity))), digits=2)
257 number_features = length(unique(unlist(lapply(peaks,mass)))) 291 number_features = length(unique(unlist(lapply(peaks,mass))))
271 colnames(featureMatrix2)[1] = c("mz") 305 colnames(featureMatrix2)[1] = c("mz")
272 featureMatrix2 = t(featureMatrix2) 306 featureMatrix2 = t(featureMatrix2)
273 write.table(featureMatrix2, file="$intensity_matrix", quote = FALSE, row.names = TRUE, col.names=FALSE, sep = "\t") 307 write.table(featureMatrix2, file="$intensity_matrix", quote = FALSE, row.names = TRUE, col.names=FALSE, sep = "\t")
274 }else{print("There are no spectra with peaks left")} 308 }else{print("There are no spectra with peaks left")}
275 309
310
311
312 #elif str( $method.methods_conditional.method ) == 'Align':
313
314 print('align')
315 ##align spectra with 2 separate functions
316
317
318 #if str($method.methods_conditional.reference_for_alignment.align_ref) == 'no_reference':
319 ## 1) calculate warping:
320 warping_function <- determineWarpingFunctions(peaks,
321 tolerance=$method.methods_conditional.tolerance, method="$method.methods_conditional.warping_method",
322 allowNoMatches=$method.methods_conditional.allow_nomatch, minFrequency = $method.methods_conditional.reference_for_alignment.min_frequency)
323 ## 2) warp spectra:
324 peaks = warpMassPeaks(peaks, warping_function, emptyNoMatches=$method.methods_conditional.empty_nomatch)
325
326
327 #elif str($method.methods_conditional.reference_for_alignment.align_ref) == 'yes_reference':
328
329 ## create reference mass_vector from tabular file
330 mass_vector = read.delim("$method.methods_conditional.reference_for_alignment.reference_file", header = $method.methods_conditional.reference_for_alignment.reference_header, stringsAsFactors = FALSE)[,$method.methods_conditional.reference_for_alignment.mz_column]
331 int_vector = rep(1,length(mass_vector))
332 mass_list = createMassPeaks(mass_vector, int_vector)
333
334 #if str($method.methods_conditional.reference_for_alignment.separate_alignment) == "FALSE"
335 print('default alignment')
336
337 ## 1) calculate warping:
338 warping_function <- determineWarpingFunctions(peaks,
339 tolerance=$method.methods_conditional.tolerance, method="$method.methods_conditional.warping_method",
340 allowNoMatches=$method.methods_conditional.allow_nomatch, reference = mass_list)
341 ## 2) warp spectra:
342 peaks = warpMassPeaks(peaks, warping_function, emptyNoMatches=$method.methods_conditional.empty_nomatch)
343
344 #elif str($method.methods_conditional.reference_for_alignment.separate_alignment) == "TRUE"
345 print('spectra wise alignment')
346
347 peaks_new_list =list()
348
349 for (pixelnb in 1:length(peaks))
350 {
351
352 ## 1) calculate warping:
353 warping_function <- determineWarpingFunctions(peaks[[pixelnb]],
354 tolerance=$method.methods_conditional.tolerance, method="$method.methods_conditional.warping_method",
355 allowNoMatches=$method.methods_conditional.allow_nomatch, reference = mass_list)
356 ## 2) warp spectra:
357 peaks_new_list = warpMassPeaks(list(peaks[[pixelnb]]), warping_function, emptyNoMatches=$method.methods_conditional.empty_nomatch)
358 }
359 peaks_new = peaks_new_list
360
361 #end if
362
363 #end if
364
365 ## QC plot and numbers
366 par(mfrow = c(2, 2), oma=c(0,0,2,0))
367 for (random_sample in random_spectra){
368 plot(peaks[[random_sample]], sub="", main=paste0("spectrum ", random_sample))}
369 title("Aligned spectra", outer=TRUE, line=0)
370 minmz = round(min(unlist(lapply(peaks,mass))), digits=4)
371 maxmz = round(max(unlist(lapply(peaks,mass))), digits=4)
372 mean_features = round(length(unlist(lapply(peaks,mass)))/length(peaks), digits=2)
373 medint = round(median(unlist(lapply(peaks,intensity))), digits=2)
374 number_features = length(unique(unlist(lapply(peaks,mass))))
375 aligned = c(minmz, maxmz,number_features,mean_features, medint)
376 QC_numbers= cbind(QC_numbers, aligned)
377 vectorofactions = append(vectorofactions, "aligned")
378
379 if (length(peaks[!sapply(peaks, isEmpty)])>0){
380 featureMatrix <- intensityMatrix(peaks)
381 ## only for profile imzML file: featurematrix is overwritten:
382 #if $infile.ext == 'imzml'
383 #if str($centroids) == "FALSE"
384 featureMatrix <- intensityMatrix(peaks, maldi_data)
385 #end if
386 #end if
387 featureMatrix2 =cbind(pixelnames, featureMatrix)
388 colnames(featureMatrix2)[1] = c("mz")
389 featureMatrix2 = t(featureMatrix2)
390 write.table(featureMatrix2, file="$intensity_matrix", quote = FALSE, row.names = TRUE, col.names=FALSE, sep = "\t")
391 }else{print("There are no spectra with peaks left")}
392
276 #elif str( $method.methods_conditional.method ) == 'Binning': 393 #elif str( $method.methods_conditional.method ) == 'Binning':
277 394
278 print('binning') 395 print('binning')
279 ##m/z binning 396 ##m/z binning
280 397
281 peaks <- binPeaks(peaks, tolerance=$method.methods_conditional.bin_tolerance, method="$method.methods_conditional.bin_method") 398 peaks <- binPeaks(peaks, tolerance=$method.methods_conditional.bin_tolerance, method="$method.methods_conditional.bin_method")
282 399
283 ## QC plot and numbers 400 ## QC plot and numbers
284 plot(peaks[[1]], main="First spectrum after binning") 401 par(mfrow = c(2, 2), oma=c(0,0,2,0))
402 for (random_sample in random_spectra){
403 plot(peaks[[random_sample]], sub="", main=paste0("spectrum ", random_sample))}
404 title("Binned spectra", outer=TRUE, line=0)
285 minmz = round(min(unlist(lapply(peaks,mass))), digits=4) 405 minmz = round(min(unlist(lapply(peaks,mass))), digits=4)
286 maxmz = round(max(unlist(lapply(peaks,mass))), digits=4) 406 maxmz = round(max(unlist(lapply(peaks,mass))), digits=4)
287 mean_features = round(length(unlist(lapply(peaks,mass)))/length(peaks), digits=2) 407 mean_features = round(length(unlist(lapply(peaks,mass)))/length(peaks), digits=2)
288 medint =round( median(unlist(lapply(peaks,intensity))), digits=2) 408 medint =round( median(unlist(lapply(peaks,intensity))), digits=2)
289 number_features = length(unique(unlist(lapply(peaks,mass)))) 409 number_features = length(unique(unlist(lapply(peaks,mass))))
327 minNumber=$method.methods_conditional.minNumber, 447 minNumber=$method.methods_conditional.minNumber,
328 mergeWhitelists=$method.methods_conditional.mergeWhitelists, label = samples) 448 mergeWhitelists=$method.methods_conditional.mergeWhitelists, label = samples)
329 #end if 449 #end if
330 450
331 ##QC plot and numbers 451 ##QC plot and numbers
332 plot(peaks[[1]], main="First spectrum after m/z filtering") 452 par(mfrow = c(2, 2), oma=c(0,0,2,0))
453 for (random_sample in random_spectra){
454 plot(peaks[[random_sample]], sub="", main=paste0("spectrum ", random_sample))}
455 title("Filtered spectra", outer=TRUE, line=0)
333 minmz = round(min(unlist(lapply(peaks,mass))), digits=4) 456 minmz = round(min(unlist(lapply(peaks,mass))), digits=4)
334 maxmz = round(max(unlist(lapply(peaks,mass))), digits=4) 457 maxmz = round(max(unlist(lapply(peaks,mass))), digits=4)
335 mean_features = round(length(unlist(lapply(peaks,mass)))/length(peaks), digits=2) 458 mean_features = round(length(unlist(lapply(peaks,mass)))/length(peaks), digits=2)
336 medint = round(median(unlist(lapply(peaks,intensity))), digits=2) 459 medint = round(median(unlist(lapply(peaks,intensity))), digits=2)
337 number_features = length(unique(unlist(lapply(peaks,mass)))) 460 number_features = length(unique(unlist(lapply(peaks,mass))))
366 colnames(mass_peaks) = c("snr", "mass", "intensity", "spectrum") 489 colnames(mass_peaks) = c("snr", "mass", "intensity", "spectrum")
367 write.table(mass_peaks, file="$masspeaks", quote = FALSE, row.names = FALSE, col.names=TRUE, sep = "\t") 490 write.table(mass_peaks, file="$masspeaks", quote = FALSE, row.names = FALSE, col.names=TRUE, sep = "\t")
368 }else{print("There are no spectra with peaks left")} 491 }else{print("There are no spectra with peaks left")}
369 492
370 ## print table with QC values 493 ## print table with QC values
371 rownames(QC_numbers) = c("min m/z", "max mz", "# features", "median \n# features", "median\nintensity") 494 rownames(QC_numbers) = c("min m/z", "max mz", "# features", "median \n# peaks (int.>0)", "median\nintensity")
372 plot(0,type='n',axes=FALSE,ann=FALSE) 495 plot(0,type='n',axes=FALSE,ann=FALSE)
373 grid.table(t(QC_numbers)) 496 grid.table(t(QC_numbers))
374 497
375 dev.off() 498 dev.off()
376 499
377 if (summarized_spectra == FALSE){ 500 if (summarized_spectra == FALSE){
378 #if $infile.ext == 'imzml' 501 #if $infile.ext == 'imzml'
379 MALDIquantForeign::exportImzMl(peaks, file="out.imzMl", processed=$export_processed) 502 MALDIquantForeign::exportImzMl(peaks, file="out.imzMl", processed=TRUE)
380 #elif $infile.ext == 'tabular' 503 #elif $infile.ext == 'tabular'
381 masspeaks_coordinates = matrix(unlist(strsplit(as.character(pixelnames), "\\_")), ncol=3, byrow=TRUE) 504 masspeaks_coordinates = matrix(unlist(strsplit(as.character(pixelnames), "\\_")), ncol=3, byrow=TRUE)
382 ## extract x and y values and create the coordinate matrix in case tabular was input 505 ## extract x and y values and create the coordinate matrix in case tabular was input
383 peaklist_coordinates = unique(cbind(as.numeric(masspeaks_coordinates[,2]), as.numeric(masspeaks_coordinates[,3]))) 506 peaklist_coordinates = unique(cbind(as.numeric(masspeaks_coordinates[,2]), as.numeric(masspeaks_coordinates[,3])))
384 exportImzMl(peaks, file="out.imzMl", processed=$export_processed, coordinates=peaklist_coordinates) 507 exportImzMl(peaks, file="out.imzMl", processed=TRUE, coordinates=peaklist_coordinates)
385 #elif $infile.ext == 'rdata' 508 #elif $infile.ext == 'rdata'
386 MALDIquantForeign::exportImzMl(peaks, file="out.imzMl", processed=$export_processed, coordinates=cardinal_coordinates) 509 MALDIquantForeign::exportImzMl(peaks, file="out.imzMl", processed=TRUE, coordinates=cardinal_coordinates)
387 #end if 510 #end if
388 511
389 } 512 }
390 513
391 ]]> 514 ]]>
392 </configfile> 515 </configfile>
393 </configfiles> 516 </configfiles>
394 <inputs> 517 <inputs>
395 <param name="infile" type="data" format="imzml,tabular,rdata" label="Inputfile as imzML, peaklist or Cardinal MSImageSet saved as RData" help="imzML or tabular format (peak list) or Cardinal MSImageSet saved as RData"/> 518 <param name="infile" type="data" format="imzml,tabular,rdata" label="MSI data" help="Input file as imzML (composite upload), tabular peaklist or Cardinal MSImageSet saved as RData (regular upload)"/>
396 <param name="centroids" type="boolean" label="Input data is centroided (picked)" help="Choose Yes if peak detection has already been done. Peak detection cannot be run again on centroided data" truevalue="TRUE" falsevalue="FALSE"/> 519 <param name="centroids" type="boolean" label="Centroided input" help="Choose Yes if peak detection has already been done. Peak detection cannot be run again on centroided data" truevalue="TRUE" falsevalue="FALSE"/>
397 <conditional name="restriction_conditional"> 520 <conditional name="restriction_conditional">
398 <param name="restriction" type="select" label="Read in only spectra of interest" help="This option only works for imzML files"> 521 <param name="restriction" type="select" label="Use only spectra of interest" help="This option only works for imzML files">
399 <option value="no_restriction" selected="True">Calculate on entire file</option> 522 <option value="no_restriction" selected="True">No, calculate on entire file</option>
400 <option value="restrict">Restrict to coordinates of interest</option> 523 <option value="restrict">Yes, restrict to spectra of interest</option>
401 </param> 524 </param>
402 <when value="restrict"> 525 <when value="restrict">
403 <param name="coordinates_file" type="data" format="tabular" label="Tabular file with coordinates" help="x-values in first column, y-values in second column"/> 526 <param name="coordinates_file" type="data" format="tabular" label="Tabular file with coordinates"/>
527 <param name="column_x" data_ref="coordinates_file" label="Column with x values" type="data_column"/>
528 <param name="column_y" data_ref="coordinates_file" label="Column with y values" type="data_column"/>
404 <param name="coordinates_header" type="boolean" label="File contains a header line" truevalue="TRUE" falsevalue="FALSE"/> 529 <param name="coordinates_header" type="boolean" label="File contains a header line" truevalue="TRUE" falsevalue="FALSE"/>
405 </when> 530 </when>
406 <when value="no_restriction"/> 531 <when value="no_restriction"/>
407 </conditional> 532 </conditional>
408 533
409 <conditional name="tabular_annotation"> 534 <conditional name="tabular_annotation">
410 <param name="load_annotation" type="select" label="Pixels have annotations" help="Annotations can be used during peak detection or filteringfsplit"> 535 <param name="load_annotation" type="select" label="Spectra annotations" help="Annotations can be used for group wise peak detection or filtering">
411 <option value="no_annotation" selected="True">pixels have no annotations</option> 536 <option value="no_annotation" selected="True">No</option>
412 <option value="yes_annotation">pixel annotation from file</option> 537 <option value="yes_annotation">Yes</option>
413 </param> 538 </param>
414 <when value="yes_annotation"> 539 <when value="yes_annotation">
415 <param name="annotation_file" type="data" format="tabular" label="Use annotations from tabular file" 540 <param name="annotation_file" type="data" format="tabular" label="Use annotations from tabular file"
416 help="Tabular file with three columns: x values, y values and pixel annotations"/> 541 help="Tabular file with three columns: x values, y values and pixel annotations"/>
417 <param name="column_x" data_ref="annotation_file" label="Column with x values" type="data_column"/> 542 <param name="column_x" data_ref="annotation_file" label="Column with x values" type="data_column"/>
424 <repeat name="methods" title="Method" min="1"> 549 <repeat name="methods" title="Method" min="1">
425 <conditional name="methods_conditional"> 550 <conditional name="methods_conditional">
426 <param name="method" type="select" label="Select a method"> 551 <param name="method" type="select" label="Select a method">
427 <option value="Peak_detection">Peak detection</option> 552 <option value="Peak_detection">Peak detection</option>
428 <option value="monoisotopic_peaks">Keep only monoisotopic peaks</option> 553 <option value="monoisotopic_peaks">Keep only monoisotopic peaks</option>
554 <option value="Align">Align Spectra (warping/phase correction)</option>
429 <option value="Binning">Binning</option> 555 <option value="Binning">Binning</option>
430 <option value="Filtering">Filtering</option> 556 <option value="Filtering">Filtering</option>
431 </param> 557 </param>
432 <when value="Peak_detection"> 558 <when value="Peak_detection">
433 <param name="peak_method" type="select" label="Noise estimation function"> 559 <param name="peak_method" type="select" label="Noise estimation function">
449 <param name="tolerance" type="float" label="Tolerance" value="0.00005" 575 <param name="tolerance" type="float" label="Tolerance" value="0.00005"
450 help="Maximal relative deviation of a peak position (m/z) to be considered as identical: abs(((mass[i]+distance)-mass[i+1])/mass[i]) smaller than 'tolerance'. For 50ppm use 0.00005 or 50e-6" /> 576 help="Maximal relative deviation of a peak position (m/z) to be considered as identical: abs(((mass[i]+distance)-mass[i+1])/mass[i]) smaller than 'tolerance'. For 50ppm use 0.00005 or 50e-6" />
451 <param name="distance" type="float" label="Distance" value="1.00235" help="Distance between two consecutive peaks in an isotopic pattern. 1.00235 is average distance for polypeptides."/> 577 <param name="distance" type="float" label="Distance" value="1.00235" help="Distance between two consecutive peaks in an isotopic pattern. 1.00235 is average distance for polypeptides."/>
452 <param name="size" type="integer" label="Size" value="3" help="Size (length) of isotopic pattern, longer patterns are prefered over shorter ones, min size is 2."/> 578 <param name="size" type="integer" label="Size" value="3" help="Size (length) of isotopic pattern, longer patterns are prefered over shorter ones, min size is 2."/>
453 </when> 579 </when>
580
581 <when value="Align">
582 <param name="warping_method" type="select" label="Warping methods">
583 <option value="lowess" selected="True">Lowess</option>
584 <option value="linear">Linear</option>
585 <option value="quadratic">Quadratic</option>
586 <option value="cubic">Cubic</option>
587 </param>
588 <param name="tolerance" type="float" value="0.00005"
589 label="Tolerance = abs(mz1 - mz2)/mz2"
590 help="Maximal relative deviation of a peak position (m/z) to be considered as identical. For 50ppm use 0.00005 or 5e-5" />
591 <param name="allow_nomatch" type="boolean" label="Don't throw an error when less than 2 reference m/z were found in a spectrum" truevalue="TRUE" falsevalue="FALSE"/>
592 <param name="empty_nomatch" type="boolean" label="If TRUE the intensity values of MassSpectrum or MassPeaks objects with missing (NA) warping functions are set to zero" truevalue="TRUE" falsevalue="FALSE"/>
593 <param name="remove_empty" type="boolean" label="Should empty spectra be removed" truevalue="TRUE" falsevalue="FALSE" help="For Cardinal RData files this step can only be performed if pixel annotations were provided"/>
594
595 <conditional name="reference_for_alignment">
596 <param name="align_ref" type="select" label="Reference" help="If given, samples will be aligned to reference, use internal calibrants to perform m/z calibration">
597 <option value="no_reference" selected="True">no reference</option>
598 <option value="yes_reference">reference from tabular file</option>
599 </param>
600 <when value="no_reference">
601 <param name="min_frequency" type="float" value="0.9" label = "minFrequency" help="Removal of all peaks which occur in less than minFrequency spectra to generate the reference m/z"/>
602 </when>
603 <when value="yes_reference">
604 <param name="reference_file" type="data" format="tabular"
605 label="Reference m/z values"
606 help="Tabular file"/>
607 <param name="mz_column" data_ref="reference_file" label="Column with m/z values" type="data_column"/>
608 <param name="reference_header" type="boolean" label="File contains a header line" truevalue="TRUE" falsevalue="FALSE"/>
609 <param name="separate_alignment" type="boolean" label="Spectrum wise alignment" help="Internal binning is omitted to avoid interaction between spectra" truevalue="TRUE" falsevalue="FALSE"/>
610 </when>
611 </conditional>
612 </when>
613
454 <when value="Binning"> 614 <when value="Binning">
455 <param name="bin_tolerance" type="float" value="0.002" label="Binning tolerance" 615 <param name="bin_tolerance" type="float" value="0.002" label="Tolerance"
456 help="After the alignment the peak positions (mass) are very similar but not identical. The binning is needed to make similar peak mass values identical."/> 616 help="After the alignment the peak positions (mass) are very similar but not identical. The binning is needed to make similar peak mass values identical."/>
457 <param name="bin_method" display="radio" type="select" label="Bin creation rule" help="strict - creates bins never containing two or more peaks of the sampe sample. relaxed - allows multiple peaks of the same sample in one bin."> 617 <param name="bin_method" display="radio" type="select" label="Bin creation rule" help="strict - creates bins never containing two or more peaks of the sampe sample. relaxed - allows multiple peaks of the same sample in one bin.">
458 <option value="strict" selected="True" >strict</option> 618 <option value="strict" selected="True" >strict</option>
459 <option value="relaxed" >relaxed</option> 619 <option value="relaxed" >relaxed</option>
460 </param> 620 </param>
468 <param name="mergeWhitelists" type="boolean" truevalue="TRUE" falsevalue="FALSE" 628 <param name="mergeWhitelists" type="boolean" truevalue="TRUE" falsevalue="FALSE"
469 label="mergeWhitelists" help="Yes means that peaks that survive the filtering in one annotation group are also kept in other groups regardless if the filtering criteria are met in these groups"/> 629 label="mergeWhitelists" help="Yes means that peaks that survive the filtering in one annotation group are also kept in other groups regardless if the filtering criteria are met in these groups"/>
470 </when> 630 </when>
471 </conditional> 631 </conditional>
472 </repeat> 632 </repeat>
473 <param name="export_processed" type="boolean" label="Export file as processed imzML" help="otherwise continuous imzML will be exported" checked="True" truevalue="TRUE" falsevalue="FALSE"/>
474 </inputs> 633 </inputs>
475 <outputs> 634 <outputs>
476 <data format="imzml" name="outfile_imzml" label="${tool.name} on ${on_string}"> 635 <data format="imzml" name="outfile_imzml" label="${tool.name} on ${on_string}: imzml">
477 <!--filter>methods_conditional['method'] == 'Peak_detection'</filter--> 636 <!--filter>methods_conditional['method'] == 'Peak_detection'</filter-->
478 </data> 637 </data>
479 <data format="pdf" name="plots" from_work_dir="peaks_qc_plot.pdf" label = "${tool.name} on ${on_string}: QC"/> 638 <data format="pdf" name="plots" from_work_dir="peaks_qc_plot.pdf" label = "${tool.name} on ${on_string}: QC"/>
480 <data format="tabular" name="masspeaks" label="${tool.name} on ${on_string}: peaklist"/> 639 <data format="tabular" name="masspeaks" label="${tool.name} on ${on_string}: peaklist"/>
481 <data format="tabular" name="intensity_matrix" label="${tool.name} on ${on_string}: intensity_matrix"/> 640 <data format="tabular" name="intensity_matrix" label="${tool.name} on ${on_string}: intensity_matrix"/>
517 </conditional> 676 </conditional>
518 </repeat> 677 </repeat>
519 <output name="plots" file="peakdetection2_QC.pdf" compare="sim_size"/> 678 <output name="plots" file="peakdetection2_QC.pdf" compare="sim_size"/>
520 <output name="masspeaks" file="masspeaks2.tabular"/> 679 <output name="masspeaks" file="masspeaks2.tabular"/>
521 <output name="intensity_matrix" file="int2.tabular"/> 680 <output name="intensity_matrix" file="int2.tabular"/>
681 <output name="outfile_imzml" ftype="imzml" file="peak_detection2.imzml.txt" lines_diff="4">
682 <extra_files type="file" file="peak_detection2.imzml" name="imzml" lines_diff="6"/>
683 <extra_files type="file" file="peak_detection2.ibd" name="ibd" compare="sim_size"/>
684 </output>
522 </test> 685 </test>
523 <test> 686 <test>
524 <param name="infile" value="" ftype="imzml"> 687 <param name="infile" value="" ftype="imzml">
525 <composite_data value="Example_Continuous.imzML"/> 688 <composite_data value="Example_Continuous.imzML"/>
526 <composite_data value="Example_Continuous.ibd"/> 689 <composite_data value="Example_Continuous.ibd"/>
557 </conditional> 720 </conditional>
558 </repeat> 721 </repeat>
559 <output name="plots" file="peakdetection3_QC.pdf" compare="sim_size"/> 722 <output name="plots" file="peakdetection3_QC.pdf" compare="sim_size"/>
560 <output name="intensity_matrix" file="intensity_matrix3.tabular"/> 723 <output name="intensity_matrix" file="intensity_matrix3.tabular"/>
561 <output name="masspeaks" file="masspeaks3.tabular"/> 724 <output name="masspeaks" file="masspeaks3.tabular"/>
725 <output name="outfile_imzml" ftype="imzml" file="peak_detection3.imzml.txt" lines_diff="4">
726 <extra_files type="file" file="peak_detection3.imzml" name="imzml" lines_diff="6"/>
727 <extra_files type="file" file="peak_detection3.ibd" name="ibd" compare="sim_size"/>
728 </output>
562 </test> 729 </test>
563 <test> 730 <test>
564 <param name="infile" value="testfile_squares.rdata" ftype="rdata"/> 731 <param name="infile" value="testfile_squares.rdata" ftype="rdata"/>
565 <param name="method" value="Peak_detection"/> 732 <param name="method" value="Peak_detection"/>
566 <param name="peak_method" value="MAD"/> 733 <param name="peak_method" value="MAD"/>
567 <param name="halfWindowSize" value="20"/> 734 <param name="halfWindowSize" value="20"/>
568 <param name="snr" value="2"/> 735 <param name="snr" value="2"/>
569 <output name="plots" file="peakdetection4_QC.pdf" compare="sim_size"/> 736 <output name="plots" file="peakdetection4_QC.pdf" compare="sim_size"/>
570 <output name="intensity_matrix" file="intensity_matrix4.tabular"/> 737 <output name="intensity_matrix" file="intensity_matrix4.tabular"/>
571 <output name="masspeaks" file="masspeaks4.tabular"/> 738 <output name="masspeaks" file="masspeaks4.tabular"/>
739 <output name="outfile_imzml" ftype="imzml" file="peak_detection4.imzml.txt" lines_diff="4">
740 <extra_files type="file" file="peak_detection4.imzml" name="imzml" lines_diff="6"/>
741 <extra_files type="file" file="peak_detection4.ibd" name="ibd" compare="sim_size"/>
742 </output>
572 </test> 743 </test>
573 </tests> 744 </tests>
574 <help> 745 <help>
575 <![CDATA[ 746 <![CDATA[
576 747
594 6.80 306.25 0.133 xy_1_1 765 6.80 306.25 0.133 xy_1_1
595 ... 766 ...
596 ... 767 ...
597 768
598 769
599 - Optional: Tabular file with pixel coordinates to restrict reading of imzML files to coordinates of interest. The file has to contain x values in the first column and y values in the second columns. Further columns are allowed. Tabular files with any header name or no header at all are supported. 770 - Optional: Tabular file with pixel coordinates to restrict reading of imzML files to coordinates of interest. Tabular files with any header name or no header at all are supported.
600 771
601 :: 772 ::
602 773
603 x_coord y_coord 774 x_coord y_coord
604 1 1 775 1 1
627 - SNR is an abbreviation for signal-to-noise-ratio. A local maximum has to be higher than SNR*noise to be recognize as peak. 798 - SNR is an abbreviation for signal-to-noise-ratio. A local maximum has to be higher than SNR*noise to be recognize as peak.
628 - 'halfWindowSize': The resulting window reaches from mass[currentIndex-halfWindowSize] to mass[currentIndex+halfWindowSize]. A local maximum has to be the highest one in the given window to be recognized as peak. 799 - 'halfWindowSize': The resulting window reaches from mass[currentIndex-halfWindowSize] to mass[currentIndex+halfWindowSize]. A local maximum has to be the highest one in the given window to be recognized as peak.
629 - Detect peaks on average mass spectra: Spectra with the same annotation (taken from the annotation tabular input) are averaged and peak picking is performed on the average spectrum of each annotation group. The exported imzML is empty and cannot be used for further analysis steps. The peaklist and intensity matrix outputs contain the annotation group names with their averaged intensity values. Filtering steps have to be done in the same run as the peak picking. 800 - Detect peaks on average mass spectra: Spectra with the same annotation (taken from the annotation tabular input) are averaged and peak picking is performed on the average spectrum of each annotation group. The exported imzML is empty and cannot be used for further analysis steps. The peaklist and intensity matrix outputs contain the annotation group names with their averaged intensity values. Filtering steps have to be done in the same run as the peak picking.
630 801
631 802
632 - Monoisotopic peaks: detection of monoisotopic peaks 803 - Monoisotopic peaks: Keeps only the monoisotopic peaks
633 804
634 805
806 - Spectra alignment (warping): alignment for (re)calibration of m/z values.
807
808 - without external reference m/z: internal reference is obtained by filtering (default 90%) and binning the peaks to find landmark peaks and their average m/z
809 - with external reference m/z: the m/z provided in a tabular file are used as a reference, at least 10 reference values are recommended
810 - non linear warping (parametric time warping plus binning) to match the reference peaks (internal or external) to the peaks with the given tolerance. At least two m/z per spectrum are needed for the alignment. This requirement can be skipped by setting "Don't throw an error when less than 2 reference m/z were found in a spectrum" to yes. If the not aligned spectra should be set to zero select yes in "If TRUE the intensity values of MassSpectrum or MassPeaks objects with missing (NA) warping functions are set to zero". In order to remove such empty spectra set "Should empty spectra be removed" to yes.
635 811
636 812
637 - Peak binning: After the alignment the peak positions (m/z) are very similar but not identical. The binning is needed to make similar peak m/z values identical. The algorithm is based on the following workflow: 813 - Peak binning: After the alignment the peak positions (m/z) are very similar but not identical. The binning is needed to make similar peak m/z values identical. The algorithm is based on the following workflow:
638 814
639 1. Put all mass in a sorted vector. 815 1. Put all m/z in a sorted vector.
640 2. Calculate differences between each neighbor. 816 2. Calculate differences between each neighbor.
641 3. Divide the mass vector at the largest gap (largest difference) and form a left and a right bin. 817 3. Divide the m/z vector at the largest gap (largest difference) and form a left and a right bin.
642 4. Rerun step 3 for the left and/or the right bin if they don't fulfill the following criteria: 818 4. Rerun step 3 for the left and/or the right bin if they don't fulfill the following criteria:
643 819
644 - All peaks in a bin are near to the mean (abs(mass-meanMass)/meanMass < tolerance). 820 - All peaks in a bin are near to the mean (abs(mass-meanMass)/meanMass < tolerance).
645 - method == "strict": The bin doesn't contain two or more peaks of the same sample. 821 - method == "strict": The bin doesn't contain two or more peaks of the same sample.
646 822
653 - To filter data that was averaged before peak detection: Filtering has to be done in the same tool run as the peak detection. The filtering criteria are automatically applied per annotation group (Group wise filtering can be 'No') and not per pixel. Example: to keep only m/z that were detected in at least half of all annotation groups set minFrequency to 0.5. 829 - To filter data that was averaged before peak detection: Filtering has to be done in the same tool run as the peak detection. The filtering criteria are automatically applied per annotation group (Group wise filtering can be 'No') and not per pixel. Example: to keep only m/z that were detected in at least half of all annotation groups set minFrequency to 0.5.
654 830
655 831
656 **Output** 832 **Output**
657 833
658 - centroided imzML file (processed or continuous), imzML file is empty when 'Detect peaks on average mass spectra' is chosen. 834 - centroided, processed imzML file, imzML file is empty when 'Detect peaks on average mass spectra' is chosen.
659 - pdf with mass spectra plots after each preprocessing step and a table with key values after each preprocessing step 835 - pdf with mass spectra plots of four random spectra and a table with key values after each preprocessing step
660 - peak list (tabular file) with the columns "snr", "mass", "intensity" and "spectrum" 836 - peak list (tabular file) with the columns "snr", "mass", "intensity" and "spectrum"
661 - tabular file with intensity matrix (m/z in rows and spectra in columns). If the input file was imzML in profile mode the intensities before peak picking are also stored in the matrix . For all other inputs not picked values are set to NA. For peak picking on the average of multiple spectra, each spectra group is a column with mean intensities for each m/z. 837 - tabular file with intensity matrix (m/z in rows and spectra in columns). If the input file was imzML in profile mode the intensities before peak picking are also stored in the matrix . For all other inputs not picked values are set to NA. For peak picking on the average of multiple spectra, each spectra group is a column with mean intensities for each m/z.
662 838
663 .. _MALDIquant: http://strimmerlab.org/software/maldiquant/ 839 .. _MALDIquant: http://strimmerlab.org/software/maldiquant/
664 840