Mercurial > repos > galaxyp > maldi_quant_peak_detection
comparison maldi_quant_peakdetection.xml @ 3:36d38d2cf88c draft
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/MALDIquant commit f127be2141cf22e269c85282d226eb16fe14a9c1
author | galaxyp |
---|---|
date | Fri, 15 Feb 2019 10:26:45 -0500 |
parents | 17c54820f3be |
children | e9300ef37403 |
comparison
equal
deleted
inserted
replaced
2:17c54820f3be | 3:36d38d2cf88c |
---|---|
1 <tool id="maldi_quant_peak_detection" name="MALDIquant peak detection" version="@VERSION@.2"> | 1 <tool id="maldi_quant_peak_detection" name="MALDIquant peak detection" version="@VERSION@.3"> |
2 <description> | 2 <description> |
3 Peak detection, binning and filtering for mass-spectrometry imaging data | 3 Peak detection, binning and filtering for mass-spectrometry imaging data |
4 </description> | 4 </description> |
5 <macros> | 5 <macros> |
6 <import>maldi_macros.xml</import> | 6 <import>maldi_macros.xml</import> |
35 | 35 |
36 print('Reading mask region') | 36 print('Reading mask region') |
37 | 37 |
38 ## Import imzML file | 38 ## Import imzML file |
39 coordinate_matrix = as.matrix(read.delim("$restriction_conditional.coordinates_file", header = $restriction_conditional.coordinates_header, stringsAsFactors = FALSE))[,1:2] | 39 coordinate_matrix = as.matrix(read.delim("$restriction_conditional.coordinates_file", header = $restriction_conditional.coordinates_header, stringsAsFactors = FALSE))[,1:2] |
40 coordinate_matrix = coordinate_matrix[,c($restriction_conditional.column_x, $restriction_conditional.column_y)] | |
40 | 41 |
41 maldi_data <- importImzMl('infile.imzML', | 42 maldi_data <- importImzMl('infile.imzML', |
42 coordinates = coordinate_matrix, centroided = $centroids) | 43 coordinates = coordinate_matrix, centroided = $centroids) |
43 pixelnames = paste("xy", coordinates(maldi_data)[,1],coordinates(maldi_data)[,2], sep="_") | 44 pixelnames = paste("xy", coordinates(maldi_data)[,1],coordinates(maldi_data)[,2], sep="_") |
44 | 45 |
97 if (centroided(msidata) == FALSE){ | 98 if (centroided(msidata) == FALSE){ |
98 ## create mass spectrum object | 99 ## create mass spectrum object |
99 cardinal_mzs = Cardinal::mz(msidata) | 100 cardinal_mzs = Cardinal::mz(msidata) |
100 maldi_data = list() | 101 maldi_data = list() |
101 for(number_spectra in 1:ncol(msidata)){ | 102 for(number_spectra in 1:ncol(msidata)){ |
102 maldi_data[[number_spectra]] = createMassSpectrum(mass = cardinal_mzs, intensity = iData(msidata)[,number_spectra]) | 103 maldi_data[[number_spectra]] = createMassSpectrum(mass = cardinal_mzs, intensity = iData(msidata)[,number_spectra])} |
103 coordinates_info = cbind(cardinal_coordinates, c(1:length(maldi_data)))} | 104 coordinates_info = cbind(cardinal_coordinates, c(1:length(maldi_data))) |
104 }else{ | 105 }else{ |
105 peaks = list() | 106 peaks = list() |
106 for (spectra in 1:ncol(msidata)) | 107 for (spectra in 1:ncol(msidata)) |
107 { | 108 { |
108 single_peaks = createMassPeaks(Cardinal::mz(msidata), Cardinal::spectra(msidata)[,spectra], snr=as.numeric(rep("NA", nrow(msidata)))) | 109 single_peaks = createMassPeaks(Cardinal::mz(msidata), Cardinal::spectra(msidata)[,spectra], snr=as.numeric(rep("NA", nrow(msidata)))) |
109 peaks[[spectra]] = single_peaks | 110 peaks[[spectra]] = single_peaks |
110 }} | 111 } |
112 coordinates_info = cbind(cardinal_coordinates, c(1:length(peaks)))} | |
111 #end if | 113 #end if |
112 #end if | 114 #end if |
113 | 115 |
114 ## default summarized = FALSE | 116 ## default summarized = FALSE |
115 summarized_spectra = FALSE | 117 summarized_spectra = FALSE |
123 | 125 |
124 title(main=paste("$filename")) | 126 title(main=paste("$filename")) |
125 | 127 |
126 ## plot input file spectrum: | 128 ## plot input file spectrum: |
127 #if $centroids: | 129 #if $centroids: |
128 plot(peaks[[1]], main="First spectrum of input file") | 130 ## Choose random spectra for QC plots |
131 random_spectra = sample(1:length(peaks), 4, replace=FALSE) | |
132 par(mfrow = c(2, 2), oma=c(0,0,2,0)) | |
133 for (random_sample in random_spectra){ | |
134 plot(peaks[[random_sample]],sub="", main=paste0("spectrum ", random_sample))} | |
135 title("Input spectra", outer=TRUE, line=0) | |
136 | |
129 #else | 137 #else |
130 avgSpectra <- averageMassSpectra(maldi_data,method="mean") | 138 ## Choose random spectra for QC plots |
131 plot(avgSpectra, main="Average spectrum of input file") | 139 random_spectra = sample(1:length(maldi_data), 4, replace=FALSE) |
140 par(mfrow = c(2, 2), oma=c(0,0,2,0)) | |
141 for (random_sample in random_spectra){ | |
142 plot(maldi_data[[random_sample]],sub="", main=paste0("spectrum ", random_sample))} | |
143 title("Input spectra", outer=TRUE, line=0) | |
132 #end if | 144 #end if |
133 | 145 |
134 | 146 |
135 ## QC numbers for input file | 147 ## QC numbers for input file |
136 #if str($centroids) == "TRUE" | 148 #if str($centroids) == "TRUE" |
160 ## read and extract x,y,annotation information | 172 ## read and extract x,y,annotation information |
161 input_tabular = read.delim("$tabular_annotation.annotation_file", header = $tabular_annotation.tabular_header, stringsAsFactors = FALSE) | 173 input_tabular = read.delim("$tabular_annotation.annotation_file", header = $tabular_annotation.tabular_header, stringsAsFactors = FALSE) |
162 annotation_input = input_tabular[,c($tabular_annotation.column_x, $tabular_annotation.column_y, $tabular_annotation.column_names)] | 174 annotation_input = input_tabular[,c($tabular_annotation.column_x, $tabular_annotation.column_y, $tabular_annotation.column_names)] |
163 colnames(annotation_input) = c("x", "y", "annotation") ## rename annotations header to default name "annotation" | 175 colnames(annotation_input) = c("x", "y", "annotation") ## rename annotations header to default name "annotation" |
164 | 176 |
165 ## merge with coordinate information of MSI data | 177 ## merge provided annotation with coordinate information of MSI data |
166 colnames(coordinates_info)[3] = "pixel_index" | 178 colnames(coordinates_info)[3] = "pixel_index" |
167 merged_annotation = merge(coordinates_info, annotation_input, by=c("x", "y"), all.x=TRUE) | 179 merged_annotation = merge(coordinates_info, annotation_input, by=c("x", "y"), all.x=TRUE) |
168 merged_annotation[is.na(merged_annotation)] = "NA" | 180 merged_annotation[is.na(merged_annotation)] = "NA" |
181 ## order coordinate information according to pixel index to make sure that the order stays the same | |
169 merged_annotation = merged_annotation[order(merged_annotation\$pixel_index),] | 182 merged_annotation = merged_annotation[order(merged_annotation\$pixel_index),] |
170 samples = as.factor(merged_annotation\$annotation) | 183 samples = as.factor(merged_annotation\$annotation) |
171 | 184 |
172 ## print annotation overview into PDF output | 185 ## print annotation overview into PDF output |
173 | 186 |
197 | 210 |
198 #if $method.methods_conditional.use_annotations: | 211 #if $method.methods_conditional.use_annotations: |
199 maldi_data <- averageMassSpectra(maldi_data, labels=samples,method="mean") ## use average spectra for peak picking | 212 maldi_data <- averageMassSpectra(maldi_data, labels=samples,method="mean") ## use average spectra for peak picking |
200 pixelnames = levels(samples) | 213 pixelnames = levels(samples) |
201 summarized_spectra = TRUE | 214 summarized_spectra = TRUE |
202 | 215 random_spectra = sample(1:length(maldi_data), 4, replace=TRUE) |
203 #end if | 216 #end if |
204 | 217 |
205 peaks <- detectPeaks(maldi_data, method="$method.methods_conditional.peak_method", | 218 peaks <- detectPeaks(maldi_data, method="$method.methods_conditional.peak_method", |
206 halfWindowSize=$method.methods_conditional.halfWindowSize,SNR=$method.methods_conditional.snr) | 219 halfWindowSize=$method.methods_conditional.halfWindowSize,SNR=$method.methods_conditional.snr) |
207 | 220 |
208 ## QC plot and numbers | 221 ## QC plot and numbers |
209 ## plot old spectrum with baseline in blue and picked peaks in green | 222 ## plot old spectra with baseline in blue and picked peaks in green |
210 noise = estimateNoise(maldi_data[[1]], method= "$method.methods_conditional.peak_method") | 223 par(mfrow = c(2, 2), oma=c(0,0,2,0)) |
211 plot(maldi_data[[1]], main="First spectrum with noise line (blue) and picked peaks (green)") | 224 for (random_sample in random_spectra){ |
212 lines(noise[,1], noise[,2]*$method.methods_conditional.snr, col="blue") | 225 noise = estimateNoise(maldi_data[[random_sample]], method= "$method.methods_conditional.peak_method") |
213 points(peaks[[1]], col="green", pch=20) | 226 plot(maldi_data[[random_sample]], sub="", main=paste0("spectrum ", random_sample)) |
227 lines(noise[,1], noise[,2]*$method.methods_conditional.snr, col="blue") | |
228 points(peaks[[random_sample]], col="green", pch=20)} | |
229 title("S/N in blue and picked peaks in green", outer=TRUE, line=0) | |
230 | |
214 ## plot new spectrum | 231 ## plot new spectrum |
215 plot(peaks[[1]], main="First spectrum after peak detection") | 232 par(mfrow = c(2, 2), oma=c(0,0,2,0)) |
233 for (random_sample in random_spectra){ | |
234 plot(peaks[[random_sample]], sub="", main=paste0("spectrum ", random_sample))} | |
235 title("Picked peaks", outer=TRUE, line=0) | |
236 | |
216 pixel_number = length(peaks) | 237 pixel_number = length(peaks) |
217 minmz = round(min(unlist(lapply(peaks,mass))), digits=4) | 238 minmz = round(min(unlist(lapply(peaks,mass))), digits=4) |
218 maxmz = round(max(unlist(lapply(peaks,mass))), digits=4) | 239 maxmz = round(max(unlist(lapply(peaks,mass))), digits=4) |
219 mean_features = round(length(unlist(lapply(peaks,mass)))/length(peaks), digits=2) | 240 mean_features = round(length(unlist(lapply(peaks,mass)))/length(peaks), digits=2) |
220 medint = round(median(unlist(lapply(peaks,intensity))), digits=2) | 241 medint = round(median(unlist(lapply(peaks,intensity))), digits=2) |
240 #elif str( $method.methods_conditional.method ) == 'monoisotopic_peaks': | 261 #elif str( $method.methods_conditional.method ) == 'monoisotopic_peaks': |
241 | 262 |
242 print('monoisotopic peaks') | 263 print('monoisotopic peaks') |
243 ##monoisotopic peaks | 264 ##monoisotopic peaks |
244 | 265 |
245 peaks = monoisotopicPeaks(peaks, minCor=$method.methods_conditional.minCor, tolerance=$method.methods_conditional.tolerance, distance=$method.methods_conditional.distance, size=$method.methods_conditional.size) | 266 ## keep peaks to plot them with monoisotopic peaks |
246 | 267 picked_peaks = peaks |
247 ## QC plot and numbers | 268 |
269 peaks = monoisotopicPeaks(peaks, minCor=$method.methods_conditional.minCor, | |
270 tolerance=$method.methods_conditional.tolerance, | |
271 distance=$method.methods_conditional.distance, | |
272 size=$method.methods_conditional.size) | |
273 | |
248 ## plot old spectrum with picked isotopes as green dots | 274 ## plot old spectrum with picked isotopes as green dots |
249 plot(peaks[[1]], main="First spectrum with picked monoisotopic peaks (green)") | 275 par(mfrow = c(2, 2), oma=c(0,0,2,0)) |
250 points(peaks[[1]], col="green", pch=20) | 276 for (random_sample in random_spectra){ |
251 ## plot new spectrum | 277 plot(picked_peaks[[random_sample]], sub="", main=paste0("spectrum ", random_sample)) |
252 plot(peaks[[1]], main="First spectrum after monoisotopic peaks detection") | 278 points(peaks[[random_sample]], col="green", pch=20)} |
279 title(paste0("Monoisotopic peaks in green"), outer=TRUE, line=0) | |
280 | |
281 | |
282 par(mfrow = c(2, 2), oma=c(0,0,2,0)) | |
283 for (random_sample in random_spectra){ | |
284 plot(peaks[[random_sample]], sub="", main=paste0("spectrum ", random_sample))} | |
285 title("Monoisotopic peaks", outer=TRUE, line=0) | |
286 | |
253 minmz = round(min(unlist(lapply(peaks,mass))), digits=4) | 287 minmz = round(min(unlist(lapply(peaks,mass))), digits=4) |
254 maxmz = round(max(unlist(lapply(peaks,mass))), digits=4) | 288 maxmz = round(max(unlist(lapply(peaks,mass))), digits=4) |
255 mean_features = round(length(unlist(lapply(peaks,mass)))/length(peaks), digits=2) | 289 mean_features = round(length(unlist(lapply(peaks,mass)))/length(peaks), digits=2) |
256 medint = round(median(unlist(lapply(peaks,intensity))), digits=2) | 290 medint = round(median(unlist(lapply(peaks,intensity))), digits=2) |
257 number_features = length(unique(unlist(lapply(peaks,mass)))) | 291 number_features = length(unique(unlist(lapply(peaks,mass)))) |
271 colnames(featureMatrix2)[1] = c("mz") | 305 colnames(featureMatrix2)[1] = c("mz") |
272 featureMatrix2 = t(featureMatrix2) | 306 featureMatrix2 = t(featureMatrix2) |
273 write.table(featureMatrix2, file="$intensity_matrix", quote = FALSE, row.names = TRUE, col.names=FALSE, sep = "\t") | 307 write.table(featureMatrix2, file="$intensity_matrix", quote = FALSE, row.names = TRUE, col.names=FALSE, sep = "\t") |
274 }else{print("There are no spectra with peaks left")} | 308 }else{print("There are no spectra with peaks left")} |
275 | 309 |
310 | |
311 | |
312 #elif str( $method.methods_conditional.method ) == 'Align': | |
313 | |
314 print('align') | |
315 ##align spectra with 2 separate functions | |
316 | |
317 | |
318 #if str($method.methods_conditional.reference_for_alignment.align_ref) == 'no_reference': | |
319 ## 1) calculate warping: | |
320 warping_function <- determineWarpingFunctions(peaks, | |
321 tolerance=$method.methods_conditional.tolerance, method="$method.methods_conditional.warping_method", | |
322 allowNoMatches=$method.methods_conditional.allow_nomatch, minFrequency = $method.methods_conditional.reference_for_alignment.min_frequency) | |
323 ## 2) warp spectra: | |
324 peaks = warpMassPeaks(peaks, warping_function, emptyNoMatches=$method.methods_conditional.empty_nomatch) | |
325 | |
326 | |
327 #elif str($method.methods_conditional.reference_for_alignment.align_ref) == 'yes_reference': | |
328 | |
329 ## create reference mass_vector from tabular file | |
330 mass_vector = read.delim("$method.methods_conditional.reference_for_alignment.reference_file", header = $method.methods_conditional.reference_for_alignment.reference_header, stringsAsFactors = FALSE)[,$method.methods_conditional.reference_for_alignment.mz_column] | |
331 int_vector = rep(1,length(mass_vector)) | |
332 mass_list = createMassPeaks(mass_vector, int_vector) | |
333 | |
334 #if str($method.methods_conditional.reference_for_alignment.separate_alignment) == "FALSE" | |
335 print('default alignment') | |
336 | |
337 ## 1) calculate warping: | |
338 warping_function <- determineWarpingFunctions(peaks, | |
339 tolerance=$method.methods_conditional.tolerance, method="$method.methods_conditional.warping_method", | |
340 allowNoMatches=$method.methods_conditional.allow_nomatch, reference = mass_list) | |
341 ## 2) warp spectra: | |
342 peaks = warpMassPeaks(peaks, warping_function, emptyNoMatches=$method.methods_conditional.empty_nomatch) | |
343 | |
344 #elif str($method.methods_conditional.reference_for_alignment.separate_alignment) == "TRUE" | |
345 print('spectra wise alignment') | |
346 | |
347 peaks_new_list =list() | |
348 | |
349 for (pixelnb in 1:length(peaks)) | |
350 { | |
351 | |
352 ## 1) calculate warping: | |
353 warping_function <- determineWarpingFunctions(peaks[[pixelnb]], | |
354 tolerance=$method.methods_conditional.tolerance, method="$method.methods_conditional.warping_method", | |
355 allowNoMatches=$method.methods_conditional.allow_nomatch, reference = mass_list) | |
356 ## 2) warp spectra: | |
357 peaks_new_list = warpMassPeaks(list(peaks[[pixelnb]]), warping_function, emptyNoMatches=$method.methods_conditional.empty_nomatch) | |
358 } | |
359 peaks_new = peaks_new_list | |
360 | |
361 #end if | |
362 | |
363 #end if | |
364 | |
365 ## QC plot and numbers | |
366 par(mfrow = c(2, 2), oma=c(0,0,2,0)) | |
367 for (random_sample in random_spectra){ | |
368 plot(peaks[[random_sample]], sub="", main=paste0("spectrum ", random_sample))} | |
369 title("Aligned spectra", outer=TRUE, line=0) | |
370 minmz = round(min(unlist(lapply(peaks,mass))), digits=4) | |
371 maxmz = round(max(unlist(lapply(peaks,mass))), digits=4) | |
372 mean_features = round(length(unlist(lapply(peaks,mass)))/length(peaks), digits=2) | |
373 medint = round(median(unlist(lapply(peaks,intensity))), digits=2) | |
374 number_features = length(unique(unlist(lapply(peaks,mass)))) | |
375 aligned = c(minmz, maxmz,number_features,mean_features, medint) | |
376 QC_numbers= cbind(QC_numbers, aligned) | |
377 vectorofactions = append(vectorofactions, "aligned") | |
378 | |
379 if (length(peaks[!sapply(peaks, isEmpty)])>0){ | |
380 featureMatrix <- intensityMatrix(peaks) | |
381 ## only for profile imzML file: featurematrix is overwritten: | |
382 #if $infile.ext == 'imzml' | |
383 #if str($centroids) == "FALSE" | |
384 featureMatrix <- intensityMatrix(peaks, maldi_data) | |
385 #end if | |
386 #end if | |
387 featureMatrix2 =cbind(pixelnames, featureMatrix) | |
388 colnames(featureMatrix2)[1] = c("mz") | |
389 featureMatrix2 = t(featureMatrix2) | |
390 write.table(featureMatrix2, file="$intensity_matrix", quote = FALSE, row.names = TRUE, col.names=FALSE, sep = "\t") | |
391 }else{print("There are no spectra with peaks left")} | |
392 | |
276 #elif str( $method.methods_conditional.method ) == 'Binning': | 393 #elif str( $method.methods_conditional.method ) == 'Binning': |
277 | 394 |
278 print('binning') | 395 print('binning') |
279 ##m/z binning | 396 ##m/z binning |
280 | 397 |
281 peaks <- binPeaks(peaks, tolerance=$method.methods_conditional.bin_tolerance, method="$method.methods_conditional.bin_method") | 398 peaks <- binPeaks(peaks, tolerance=$method.methods_conditional.bin_tolerance, method="$method.methods_conditional.bin_method") |
282 | 399 |
283 ## QC plot and numbers | 400 ## QC plot and numbers |
284 plot(peaks[[1]], main="First spectrum after binning") | 401 par(mfrow = c(2, 2), oma=c(0,0,2,0)) |
402 for (random_sample in random_spectra){ | |
403 plot(peaks[[random_sample]], sub="", main=paste0("spectrum ", random_sample))} | |
404 title("Binned spectra", outer=TRUE, line=0) | |
285 minmz = round(min(unlist(lapply(peaks,mass))), digits=4) | 405 minmz = round(min(unlist(lapply(peaks,mass))), digits=4) |
286 maxmz = round(max(unlist(lapply(peaks,mass))), digits=4) | 406 maxmz = round(max(unlist(lapply(peaks,mass))), digits=4) |
287 mean_features = round(length(unlist(lapply(peaks,mass)))/length(peaks), digits=2) | 407 mean_features = round(length(unlist(lapply(peaks,mass)))/length(peaks), digits=2) |
288 medint =round( median(unlist(lapply(peaks,intensity))), digits=2) | 408 medint =round( median(unlist(lapply(peaks,intensity))), digits=2) |
289 number_features = length(unique(unlist(lapply(peaks,mass)))) | 409 number_features = length(unique(unlist(lapply(peaks,mass)))) |
327 minNumber=$method.methods_conditional.minNumber, | 447 minNumber=$method.methods_conditional.minNumber, |
328 mergeWhitelists=$method.methods_conditional.mergeWhitelists, label = samples) | 448 mergeWhitelists=$method.methods_conditional.mergeWhitelists, label = samples) |
329 #end if | 449 #end if |
330 | 450 |
331 ##QC plot and numbers | 451 ##QC plot and numbers |
332 plot(peaks[[1]], main="First spectrum after m/z filtering") | 452 par(mfrow = c(2, 2), oma=c(0,0,2,0)) |
453 for (random_sample in random_spectra){ | |
454 plot(peaks[[random_sample]], sub="", main=paste0("spectrum ", random_sample))} | |
455 title("Filtered spectra", outer=TRUE, line=0) | |
333 minmz = round(min(unlist(lapply(peaks,mass))), digits=4) | 456 minmz = round(min(unlist(lapply(peaks,mass))), digits=4) |
334 maxmz = round(max(unlist(lapply(peaks,mass))), digits=4) | 457 maxmz = round(max(unlist(lapply(peaks,mass))), digits=4) |
335 mean_features = round(length(unlist(lapply(peaks,mass)))/length(peaks), digits=2) | 458 mean_features = round(length(unlist(lapply(peaks,mass)))/length(peaks), digits=2) |
336 medint = round(median(unlist(lapply(peaks,intensity))), digits=2) | 459 medint = round(median(unlist(lapply(peaks,intensity))), digits=2) |
337 number_features = length(unique(unlist(lapply(peaks,mass)))) | 460 number_features = length(unique(unlist(lapply(peaks,mass)))) |
366 colnames(mass_peaks) = c("snr", "mass", "intensity", "spectrum") | 489 colnames(mass_peaks) = c("snr", "mass", "intensity", "spectrum") |
367 write.table(mass_peaks, file="$masspeaks", quote = FALSE, row.names = FALSE, col.names=TRUE, sep = "\t") | 490 write.table(mass_peaks, file="$masspeaks", quote = FALSE, row.names = FALSE, col.names=TRUE, sep = "\t") |
368 }else{print("There are no spectra with peaks left")} | 491 }else{print("There are no spectra with peaks left")} |
369 | 492 |
370 ## print table with QC values | 493 ## print table with QC values |
371 rownames(QC_numbers) = c("min m/z", "max mz", "# features", "median \n# features", "median\nintensity") | 494 rownames(QC_numbers) = c("min m/z", "max mz", "# features", "median \n# peaks (int.>0)", "median\nintensity") |
372 plot(0,type='n',axes=FALSE,ann=FALSE) | 495 plot(0,type='n',axes=FALSE,ann=FALSE) |
373 grid.table(t(QC_numbers)) | 496 grid.table(t(QC_numbers)) |
374 | 497 |
375 dev.off() | 498 dev.off() |
376 | 499 |
377 if (summarized_spectra == FALSE){ | 500 if (summarized_spectra == FALSE){ |
378 #if $infile.ext == 'imzml' | 501 #if $infile.ext == 'imzml' |
379 MALDIquantForeign::exportImzMl(peaks, file="out.imzMl", processed=$export_processed) | 502 MALDIquantForeign::exportImzMl(peaks, file="out.imzMl", processed=TRUE) |
380 #elif $infile.ext == 'tabular' | 503 #elif $infile.ext == 'tabular' |
381 masspeaks_coordinates = matrix(unlist(strsplit(as.character(pixelnames), "\\_")), ncol=3, byrow=TRUE) | 504 masspeaks_coordinates = matrix(unlist(strsplit(as.character(pixelnames), "\\_")), ncol=3, byrow=TRUE) |
382 ## extract x and y values and create the coordinate matrix in case tabular was input | 505 ## extract x and y values and create the coordinate matrix in case tabular was input |
383 peaklist_coordinates = unique(cbind(as.numeric(masspeaks_coordinates[,2]), as.numeric(masspeaks_coordinates[,3]))) | 506 peaklist_coordinates = unique(cbind(as.numeric(masspeaks_coordinates[,2]), as.numeric(masspeaks_coordinates[,3]))) |
384 exportImzMl(peaks, file="out.imzMl", processed=$export_processed, coordinates=peaklist_coordinates) | 507 exportImzMl(peaks, file="out.imzMl", processed=TRUE, coordinates=peaklist_coordinates) |
385 #elif $infile.ext == 'rdata' | 508 #elif $infile.ext == 'rdata' |
386 MALDIquantForeign::exportImzMl(peaks, file="out.imzMl", processed=$export_processed, coordinates=cardinal_coordinates) | 509 MALDIquantForeign::exportImzMl(peaks, file="out.imzMl", processed=TRUE, coordinates=cardinal_coordinates) |
387 #end if | 510 #end if |
388 | 511 |
389 } | 512 } |
390 | 513 |
391 ]]> | 514 ]]> |
392 </configfile> | 515 </configfile> |
393 </configfiles> | 516 </configfiles> |
394 <inputs> | 517 <inputs> |
395 <param name="infile" type="data" format="imzml,tabular,rdata" label="Inputfile as imzML, peaklist or Cardinal MSImageSet saved as RData" help="imzML or tabular format (peak list) or Cardinal MSImageSet saved as RData"/> | 518 <param name="infile" type="data" format="imzml,tabular,rdata" label="MSI data" help="Input file as imzML (composite upload), tabular peaklist or Cardinal MSImageSet saved as RData (regular upload)"/> |
396 <param name="centroids" type="boolean" label="Input data is centroided (picked)" help="Choose Yes if peak detection has already been done. Peak detection cannot be run again on centroided data" truevalue="TRUE" falsevalue="FALSE"/> | 519 <param name="centroids" type="boolean" label="Centroided input" help="Choose Yes if peak detection has already been done. Peak detection cannot be run again on centroided data" truevalue="TRUE" falsevalue="FALSE"/> |
397 <conditional name="restriction_conditional"> | 520 <conditional name="restriction_conditional"> |
398 <param name="restriction" type="select" label="Read in only spectra of interest" help="This option only works for imzML files"> | 521 <param name="restriction" type="select" label="Use only spectra of interest" help="This option only works for imzML files"> |
399 <option value="no_restriction" selected="True">Calculate on entire file</option> | 522 <option value="no_restriction" selected="True">No, calculate on entire file</option> |
400 <option value="restrict">Restrict to coordinates of interest</option> | 523 <option value="restrict">Yes, restrict to spectra of interest</option> |
401 </param> | 524 </param> |
402 <when value="restrict"> | 525 <when value="restrict"> |
403 <param name="coordinates_file" type="data" format="tabular" label="Tabular file with coordinates" help="x-values in first column, y-values in second column"/> | 526 <param name="coordinates_file" type="data" format="tabular" label="Tabular file with coordinates"/> |
527 <param name="column_x" data_ref="coordinates_file" label="Column with x values" type="data_column"/> | |
528 <param name="column_y" data_ref="coordinates_file" label="Column with y values" type="data_column"/> | |
404 <param name="coordinates_header" type="boolean" label="File contains a header line" truevalue="TRUE" falsevalue="FALSE"/> | 529 <param name="coordinates_header" type="boolean" label="File contains a header line" truevalue="TRUE" falsevalue="FALSE"/> |
405 </when> | 530 </when> |
406 <when value="no_restriction"/> | 531 <when value="no_restriction"/> |
407 </conditional> | 532 </conditional> |
408 | 533 |
409 <conditional name="tabular_annotation"> | 534 <conditional name="tabular_annotation"> |
410 <param name="load_annotation" type="select" label="Pixels have annotations" help="Annotations can be used during peak detection or filteringfsplit"> | 535 <param name="load_annotation" type="select" label="Spectra annotations" help="Annotations can be used for group wise peak detection or filtering"> |
411 <option value="no_annotation" selected="True">pixels have no annotations</option> | 536 <option value="no_annotation" selected="True">No</option> |
412 <option value="yes_annotation">pixel annotation from file</option> | 537 <option value="yes_annotation">Yes</option> |
413 </param> | 538 </param> |
414 <when value="yes_annotation"> | 539 <when value="yes_annotation"> |
415 <param name="annotation_file" type="data" format="tabular" label="Use annotations from tabular file" | 540 <param name="annotation_file" type="data" format="tabular" label="Use annotations from tabular file" |
416 help="Tabular file with three columns: x values, y values and pixel annotations"/> | 541 help="Tabular file with three columns: x values, y values and pixel annotations"/> |
417 <param name="column_x" data_ref="annotation_file" label="Column with x values" type="data_column"/> | 542 <param name="column_x" data_ref="annotation_file" label="Column with x values" type="data_column"/> |
424 <repeat name="methods" title="Method" min="1"> | 549 <repeat name="methods" title="Method" min="1"> |
425 <conditional name="methods_conditional"> | 550 <conditional name="methods_conditional"> |
426 <param name="method" type="select" label="Select a method"> | 551 <param name="method" type="select" label="Select a method"> |
427 <option value="Peak_detection">Peak detection</option> | 552 <option value="Peak_detection">Peak detection</option> |
428 <option value="monoisotopic_peaks">Keep only monoisotopic peaks</option> | 553 <option value="monoisotopic_peaks">Keep only monoisotopic peaks</option> |
554 <option value="Align">Align Spectra (warping/phase correction)</option> | |
429 <option value="Binning">Binning</option> | 555 <option value="Binning">Binning</option> |
430 <option value="Filtering">Filtering</option> | 556 <option value="Filtering">Filtering</option> |
431 </param> | 557 </param> |
432 <when value="Peak_detection"> | 558 <when value="Peak_detection"> |
433 <param name="peak_method" type="select" label="Noise estimation function"> | 559 <param name="peak_method" type="select" label="Noise estimation function"> |
449 <param name="tolerance" type="float" label="Tolerance" value="0.00005" | 575 <param name="tolerance" type="float" label="Tolerance" value="0.00005" |
450 help="Maximal relative deviation of a peak position (m/z) to be considered as identical: abs(((mass[i]+distance)-mass[i+1])/mass[i]) smaller than 'tolerance'. For 50ppm use 0.00005 or 50e-6" /> | 576 help="Maximal relative deviation of a peak position (m/z) to be considered as identical: abs(((mass[i]+distance)-mass[i+1])/mass[i]) smaller than 'tolerance'. For 50ppm use 0.00005 or 50e-6" /> |
451 <param name="distance" type="float" label="Distance" value="1.00235" help="Distance between two consecutive peaks in an isotopic pattern. 1.00235 is average distance for polypeptides."/> | 577 <param name="distance" type="float" label="Distance" value="1.00235" help="Distance between two consecutive peaks in an isotopic pattern. 1.00235 is average distance for polypeptides."/> |
452 <param name="size" type="integer" label="Size" value="3" help="Size (length) of isotopic pattern, longer patterns are prefered over shorter ones, min size is 2."/> | 578 <param name="size" type="integer" label="Size" value="3" help="Size (length) of isotopic pattern, longer patterns are prefered over shorter ones, min size is 2."/> |
453 </when> | 579 </when> |
580 | |
581 <when value="Align"> | |
582 <param name="warping_method" type="select" label="Warping methods"> | |
583 <option value="lowess" selected="True">Lowess</option> | |
584 <option value="linear">Linear</option> | |
585 <option value="quadratic">Quadratic</option> | |
586 <option value="cubic">Cubic</option> | |
587 </param> | |
588 <param name="tolerance" type="float" value="0.00005" | |
589 label="Tolerance = abs(mz1 - mz2)/mz2" | |
590 help="Maximal relative deviation of a peak position (m/z) to be considered as identical. For 50ppm use 0.00005 or 5e-5" /> | |
591 <param name="allow_nomatch" type="boolean" label="Don't throw an error when less than 2 reference m/z were found in a spectrum" truevalue="TRUE" falsevalue="FALSE"/> | |
592 <param name="empty_nomatch" type="boolean" label="If TRUE the intensity values of MassSpectrum or MassPeaks objects with missing (NA) warping functions are set to zero" truevalue="TRUE" falsevalue="FALSE"/> | |
593 <param name="remove_empty" type="boolean" label="Should empty spectra be removed" truevalue="TRUE" falsevalue="FALSE" help="For Cardinal RData files this step can only be performed if pixel annotations were provided"/> | |
594 | |
595 <conditional name="reference_for_alignment"> | |
596 <param name="align_ref" type="select" label="Reference" help="If given, samples will be aligned to reference, use internal calibrants to perform m/z calibration"> | |
597 <option value="no_reference" selected="True">no reference</option> | |
598 <option value="yes_reference">reference from tabular file</option> | |
599 </param> | |
600 <when value="no_reference"> | |
601 <param name="min_frequency" type="float" value="0.9" label = "minFrequency" help="Removal of all peaks which occur in less than minFrequency spectra to generate the reference m/z"/> | |
602 </when> | |
603 <when value="yes_reference"> | |
604 <param name="reference_file" type="data" format="tabular" | |
605 label="Reference m/z values" | |
606 help="Tabular file"/> | |
607 <param name="mz_column" data_ref="reference_file" label="Column with m/z values" type="data_column"/> | |
608 <param name="reference_header" type="boolean" label="File contains a header line" truevalue="TRUE" falsevalue="FALSE"/> | |
609 <param name="separate_alignment" type="boolean" label="Spectrum wise alignment" help="Internal binning is omitted to avoid interaction between spectra" truevalue="TRUE" falsevalue="FALSE"/> | |
610 </when> | |
611 </conditional> | |
612 </when> | |
613 | |
454 <when value="Binning"> | 614 <when value="Binning"> |
455 <param name="bin_tolerance" type="float" value="0.002" label="Binning tolerance" | 615 <param name="bin_tolerance" type="float" value="0.002" label="Tolerance" |
456 help="After the alignment the peak positions (mass) are very similar but not identical. The binning is needed to make similar peak mass values identical."/> | 616 help="After the alignment the peak positions (mass) are very similar but not identical. The binning is needed to make similar peak mass values identical."/> |
457 <param name="bin_method" display="radio" type="select" label="Bin creation rule" help="strict - creates bins never containing two or more peaks of the sampe sample. relaxed - allows multiple peaks of the same sample in one bin."> | 617 <param name="bin_method" display="radio" type="select" label="Bin creation rule" help="strict - creates bins never containing two or more peaks of the sampe sample. relaxed - allows multiple peaks of the same sample in one bin."> |
458 <option value="strict" selected="True" >strict</option> | 618 <option value="strict" selected="True" >strict</option> |
459 <option value="relaxed" >relaxed</option> | 619 <option value="relaxed" >relaxed</option> |
460 </param> | 620 </param> |
468 <param name="mergeWhitelists" type="boolean" truevalue="TRUE" falsevalue="FALSE" | 628 <param name="mergeWhitelists" type="boolean" truevalue="TRUE" falsevalue="FALSE" |
469 label="mergeWhitelists" help="Yes means that peaks that survive the filtering in one annotation group are also kept in other groups regardless if the filtering criteria are met in these groups"/> | 629 label="mergeWhitelists" help="Yes means that peaks that survive the filtering in one annotation group are also kept in other groups regardless if the filtering criteria are met in these groups"/> |
470 </when> | 630 </when> |
471 </conditional> | 631 </conditional> |
472 </repeat> | 632 </repeat> |
473 <param name="export_processed" type="boolean" label="Export file as processed imzML" help="otherwise continuous imzML will be exported" checked="True" truevalue="TRUE" falsevalue="FALSE"/> | |
474 </inputs> | 633 </inputs> |
475 <outputs> | 634 <outputs> |
476 <data format="imzml" name="outfile_imzml" label="${tool.name} on ${on_string}"> | 635 <data format="imzml" name="outfile_imzml" label="${tool.name} on ${on_string}: imzml"> |
477 <!--filter>methods_conditional['method'] == 'Peak_detection'</filter--> | 636 <!--filter>methods_conditional['method'] == 'Peak_detection'</filter--> |
478 </data> | 637 </data> |
479 <data format="pdf" name="plots" from_work_dir="peaks_qc_plot.pdf" label = "${tool.name} on ${on_string}: QC"/> | 638 <data format="pdf" name="plots" from_work_dir="peaks_qc_plot.pdf" label = "${tool.name} on ${on_string}: QC"/> |
480 <data format="tabular" name="masspeaks" label="${tool.name} on ${on_string}: peaklist"/> | 639 <data format="tabular" name="masspeaks" label="${tool.name} on ${on_string}: peaklist"/> |
481 <data format="tabular" name="intensity_matrix" label="${tool.name} on ${on_string}: intensity_matrix"/> | 640 <data format="tabular" name="intensity_matrix" label="${tool.name} on ${on_string}: intensity_matrix"/> |
517 </conditional> | 676 </conditional> |
518 </repeat> | 677 </repeat> |
519 <output name="plots" file="peakdetection2_QC.pdf" compare="sim_size"/> | 678 <output name="plots" file="peakdetection2_QC.pdf" compare="sim_size"/> |
520 <output name="masspeaks" file="masspeaks2.tabular"/> | 679 <output name="masspeaks" file="masspeaks2.tabular"/> |
521 <output name="intensity_matrix" file="int2.tabular"/> | 680 <output name="intensity_matrix" file="int2.tabular"/> |
681 <output name="outfile_imzml" ftype="imzml" file="peak_detection2.imzml.txt" lines_diff="4"> | |
682 <extra_files type="file" file="peak_detection2.imzml" name="imzml" lines_diff="6"/> | |
683 <extra_files type="file" file="peak_detection2.ibd" name="ibd" compare="sim_size"/> | |
684 </output> | |
522 </test> | 685 </test> |
523 <test> | 686 <test> |
524 <param name="infile" value="" ftype="imzml"> | 687 <param name="infile" value="" ftype="imzml"> |
525 <composite_data value="Example_Continuous.imzML"/> | 688 <composite_data value="Example_Continuous.imzML"/> |
526 <composite_data value="Example_Continuous.ibd"/> | 689 <composite_data value="Example_Continuous.ibd"/> |
557 </conditional> | 720 </conditional> |
558 </repeat> | 721 </repeat> |
559 <output name="plots" file="peakdetection3_QC.pdf" compare="sim_size"/> | 722 <output name="plots" file="peakdetection3_QC.pdf" compare="sim_size"/> |
560 <output name="intensity_matrix" file="intensity_matrix3.tabular"/> | 723 <output name="intensity_matrix" file="intensity_matrix3.tabular"/> |
561 <output name="masspeaks" file="masspeaks3.tabular"/> | 724 <output name="masspeaks" file="masspeaks3.tabular"/> |
725 <output name="outfile_imzml" ftype="imzml" file="peak_detection3.imzml.txt" lines_diff="4"> | |
726 <extra_files type="file" file="peak_detection3.imzml" name="imzml" lines_diff="6"/> | |
727 <extra_files type="file" file="peak_detection3.ibd" name="ibd" compare="sim_size"/> | |
728 </output> | |
562 </test> | 729 </test> |
563 <test> | 730 <test> |
564 <param name="infile" value="testfile_squares.rdata" ftype="rdata"/> | 731 <param name="infile" value="testfile_squares.rdata" ftype="rdata"/> |
565 <param name="method" value="Peak_detection"/> | 732 <param name="method" value="Peak_detection"/> |
566 <param name="peak_method" value="MAD"/> | 733 <param name="peak_method" value="MAD"/> |
567 <param name="halfWindowSize" value="20"/> | 734 <param name="halfWindowSize" value="20"/> |
568 <param name="snr" value="2"/> | 735 <param name="snr" value="2"/> |
569 <output name="plots" file="peakdetection4_QC.pdf" compare="sim_size"/> | 736 <output name="plots" file="peakdetection4_QC.pdf" compare="sim_size"/> |
570 <output name="intensity_matrix" file="intensity_matrix4.tabular"/> | 737 <output name="intensity_matrix" file="intensity_matrix4.tabular"/> |
571 <output name="masspeaks" file="masspeaks4.tabular"/> | 738 <output name="masspeaks" file="masspeaks4.tabular"/> |
739 <output name="outfile_imzml" ftype="imzml" file="peak_detection4.imzml.txt" lines_diff="4"> | |
740 <extra_files type="file" file="peak_detection4.imzml" name="imzml" lines_diff="6"/> | |
741 <extra_files type="file" file="peak_detection4.ibd" name="ibd" compare="sim_size"/> | |
742 </output> | |
572 </test> | 743 </test> |
573 </tests> | 744 </tests> |
574 <help> | 745 <help> |
575 <![CDATA[ | 746 <![CDATA[ |
576 | 747 |
594 6.80 306.25 0.133 xy_1_1 | 765 6.80 306.25 0.133 xy_1_1 |
595 ... | 766 ... |
596 ... | 767 ... |
597 | 768 |
598 | 769 |
599 - Optional: Tabular file with pixel coordinates to restrict reading of imzML files to coordinates of interest. The file has to contain x values in the first column and y values in the second columns. Further columns are allowed. Tabular files with any header name or no header at all are supported. | 770 - Optional: Tabular file with pixel coordinates to restrict reading of imzML files to coordinates of interest. Tabular files with any header name or no header at all are supported. |
600 | 771 |
601 :: | 772 :: |
602 | 773 |
603 x_coord y_coord | 774 x_coord y_coord |
604 1 1 | 775 1 1 |
627 - SNR is an abbreviation for signal-to-noise-ratio. A local maximum has to be higher than SNR*noise to be recognize as peak. | 798 - SNR is an abbreviation for signal-to-noise-ratio. A local maximum has to be higher than SNR*noise to be recognize as peak. |
628 - 'halfWindowSize': The resulting window reaches from mass[currentIndex-halfWindowSize] to mass[currentIndex+halfWindowSize]. A local maximum has to be the highest one in the given window to be recognized as peak. | 799 - 'halfWindowSize': The resulting window reaches from mass[currentIndex-halfWindowSize] to mass[currentIndex+halfWindowSize]. A local maximum has to be the highest one in the given window to be recognized as peak. |
629 - Detect peaks on average mass spectra: Spectra with the same annotation (taken from the annotation tabular input) are averaged and peak picking is performed on the average spectrum of each annotation group. The exported imzML is empty and cannot be used for further analysis steps. The peaklist and intensity matrix outputs contain the annotation group names with their averaged intensity values. Filtering steps have to be done in the same run as the peak picking. | 800 - Detect peaks on average mass spectra: Spectra with the same annotation (taken from the annotation tabular input) are averaged and peak picking is performed on the average spectrum of each annotation group. The exported imzML is empty and cannot be used for further analysis steps. The peaklist and intensity matrix outputs contain the annotation group names with their averaged intensity values. Filtering steps have to be done in the same run as the peak picking. |
630 | 801 |
631 | 802 |
632 - Monoisotopic peaks: detection of monoisotopic peaks | 803 - Monoisotopic peaks: Keeps only the monoisotopic peaks |
633 | 804 |
634 | 805 |
806 - Spectra alignment (warping): alignment for (re)calibration of m/z values. | |
807 | |
808 - without external reference m/z: internal reference is obtained by filtering (default 90%) and binning the peaks to find landmark peaks and their average m/z | |
809 - with external reference m/z: the m/z provided in a tabular file are used as a reference, at least 10 reference values are recommended | |
810 - non linear warping (parametric time warping plus binning) to match the reference peaks (internal or external) to the peaks with the given tolerance. At least two m/z per spectrum are needed for the alignment. This requirement can be skipped by setting "Don't throw an error when less than 2 reference m/z were found in a spectrum" to yes. If the not aligned spectra should be set to zero select yes in "If TRUE the intensity values of MassSpectrum or MassPeaks objects with missing (NA) warping functions are set to zero". In order to remove such empty spectra set "Should empty spectra be removed" to yes. | |
635 | 811 |
636 | 812 |
637 - Peak binning: After the alignment the peak positions (m/z) are very similar but not identical. The binning is needed to make similar peak m/z values identical. The algorithm is based on the following workflow: | 813 - Peak binning: After the alignment the peak positions (m/z) are very similar but not identical. The binning is needed to make similar peak m/z values identical. The algorithm is based on the following workflow: |
638 | 814 |
639 1. Put all mass in a sorted vector. | 815 1. Put all m/z in a sorted vector. |
640 2. Calculate differences between each neighbor. | 816 2. Calculate differences between each neighbor. |
641 3. Divide the mass vector at the largest gap (largest difference) and form a left and a right bin. | 817 3. Divide the m/z vector at the largest gap (largest difference) and form a left and a right bin. |
642 4. Rerun step 3 for the left and/or the right bin if they don't fulfill the following criteria: | 818 4. Rerun step 3 for the left and/or the right bin if they don't fulfill the following criteria: |
643 | 819 |
644 - All peaks in a bin are near to the mean (abs(mass-meanMass)/meanMass < tolerance). | 820 - All peaks in a bin are near to the mean (abs(mass-meanMass)/meanMass < tolerance). |
645 - method == "strict": The bin doesn't contain two or more peaks of the same sample. | 821 - method == "strict": The bin doesn't contain two or more peaks of the same sample. |
646 | 822 |
653 - To filter data that was averaged before peak detection: Filtering has to be done in the same tool run as the peak detection. The filtering criteria are automatically applied per annotation group (Group wise filtering can be 'No') and not per pixel. Example: to keep only m/z that were detected in at least half of all annotation groups set minFrequency to 0.5. | 829 - To filter data that was averaged before peak detection: Filtering has to be done in the same tool run as the peak detection. The filtering criteria are automatically applied per annotation group (Group wise filtering can be 'No') and not per pixel. Example: to keep only m/z that were detected in at least half of all annotation groups set minFrequency to 0.5. |
654 | 830 |
655 | 831 |
656 **Output** | 832 **Output** |
657 | 833 |
658 - centroided imzML file (processed or continuous), imzML file is empty when 'Detect peaks on average mass spectra' is chosen. | 834 - centroided, processed imzML file, imzML file is empty when 'Detect peaks on average mass spectra' is chosen. |
659 - pdf with mass spectra plots after each preprocessing step and a table with key values after each preprocessing step | 835 - pdf with mass spectra plots of four random spectra and a table with key values after each preprocessing step |
660 - peak list (tabular file) with the columns "snr", "mass", "intensity" and "spectrum" | 836 - peak list (tabular file) with the columns "snr", "mass", "intensity" and "spectrum" |
661 - tabular file with intensity matrix (m/z in rows and spectra in columns). If the input file was imzML in profile mode the intensities before peak picking are also stored in the matrix . For all other inputs not picked values are set to NA. For peak picking on the average of multiple spectra, each spectra group is a column with mean intensities for each m/z. | 837 - tabular file with intensity matrix (m/z in rows and spectra in columns). If the input file was imzML in profile mode the intensities before peak picking are also stored in the matrix . For all other inputs not picked values are set to NA. For peak picking on the average of multiple spectra, each spectra group is a column with mean intensities for each m/z. |
662 | 838 |
663 .. _MALDIquant: http://strimmerlab.org/software/maldiquant/ | 839 .. _MALDIquant: http://strimmerlab.org/software/maldiquant/ |
664 | 840 |