comparison maldi_quant_peakdetection.xml @ 7:160538a890a6 draft default tip

planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/MALDIquant commit f1e1cd260ef2884d0ba12e2b614df3c72d0934dc
author galaxyp
date Sat, 04 Mar 2023 19:14:04 +0000
parents d286ff4600dd
children
comparison
equal deleted inserted replaced
6:d286ff4600dd 7:160538a890a6
1 <tool id="maldi_quant_peak_detection" name="MALDIquant peak detection" version="@VERSION@.6"> 1 <tool id="maldi_quant_peak_detection" name="MALDIquant peak detection" version="@VERSION@.0">
2 <description> 2 <description>
3 Peak detection, binning and filtering for mass-spectrometry imaging data 3 Peak detection, binning and filtering for mass-spectrometry imaging data
4 </description> 4 </description>
5 <macros> 5 <macros>
6 <import>maldi_macros.xml</import> 6 <import>maldi_macros.xml</import>
13 cp '${infile.extra_files_path}/ibd' infile.ibd && 13 cp '${infile.extra_files_path}/ibd' infile.ibd &&
14 #elif $infile.ext == 'analyze75' 14 #elif $infile.ext == 'analyze75'
15 cp '${infile.extra_files_path}/hdr' infile.hdr && 15 cp '${infile.extra_files_path}/hdr' infile.hdr &&
16 cp '${infile.extra_files_path}/img' infile.img && 16 cp '${infile.extra_files_path}/img' infile.img &&
17 cp '${infile.extra_files_path}/t2m' infile.t2m && 17 cp '${infile.extra_files_path}/t2m' infile.t2m &&
18 #else
19 ln -s '$infile' infile.RData &&
20 #end if 18 #end if
21 cat '${maldi_quant_peak_detection}' && 19 cat '${maldi_quant_peak_detection}' &&
22 Rscript '${maldi_quant_peak_detection}' && 20 Rscript '${maldi_quant_peak_detection}' &&
23 mkdir $outfile_imzml.files_path && 21 mkdir $outfile_imzml.files_path &&
24 mv ./out.imzMl "${os.path.join($outfile_imzml.files_path, 'imzml')}" | true && 22 mv ./out.imzMl "${os.path.join($outfile_imzml.files_path, 'imzml')}" | true &&
34 32
35 #if $restriction_conditional.restriction == 'restrict': 33 #if $restriction_conditional.restriction == 'restrict':
36 34
37 print('Reading mask region') 35 print('Reading mask region')
38 36
39 ## Import imzML file 37 ## Import imzML file
40 coordinate_matrix = as.matrix(read.delim("$restriction_conditional.coordinates_file", header = $restriction_conditional.coordinates_header, stringsAsFactors = FALSE))[,1:2] 38 coordinate_matrix = read.delim("$restriction_conditional.coordinates_file", header = $restriction_conditional.coordinates_header, stringsAsFactors = FALSE)
41 coordinate_matrix = coordinate_matrix[,c($restriction_conditional.column_x, $restriction_conditional.column_y)] 39 coordinate_matrix = coordinate_matrix[,c($restriction_conditional.column_x, $restriction_conditional.column_y)]
42 40 #if str($centroids) == "TRUE"
43 maldi_data <- importImzMl('infile.imzML', 41 peaks <- importImzMl('infile.imzML',
44 coordinates = coordinate_matrix, centroided = $centroids) 42 coordinates = as.matrix(coordinate_matrix), centroided = $centroids)
45 pixelnames = paste("xy", coordinates(maldi_data)[,1],coordinates(maldi_data)[,2], sep="_") 43 pixelnames = paste("xy", coordinates(peaks)[,1],coordinates(peaks)[,2], sep="_")
46 44 #else
45 maldi_data <- importImzMl('infile.imzML',
46 coordinates = as.matrix(coordinate_matrix), centroided = $centroids)
47 pixelnames = paste("xy", coordinates(maldi_data)[,1],coordinates(maldi_data)[,2], sep="_")
48 #end if
47 49
48 #else: 50 #else:
49 51
50 print('Reading entire file') 52 print('Reading entire file')
51 ## Import imzML file 53 ## Import imzML file
75 { 77 {
76 single_peaks = createMassPeaks(peak_list[[spectra]]\$mass, peak_list[[spectra]]\$intensity, snr=peak_list[[spectra]]\$snr) 78 single_peaks = createMassPeaks(peak_list[[spectra]]\$mass, peak_list[[spectra]]\$intensity, snr=peak_list[[spectra]]\$snr)
77 peaks[[spectra]] = single_peaks 79 peaks[[spectra]] = single_peaks
78 } 80 }
79 81
80 #else
81 print('rdata file')
82 loadRData <- function(fileName){
83 #loads an RData file, and returns it
84 load(fileName)
85 get(ls()[ls() != "fileName"])
86 }
87 msidata = loadRData('infile.RData')
88 centroided(msidata) = $centroids
89 ## change to correct pixelnames
90
91 x_coords = unlist(lapply(strsplit(names(Cardinal::pixels(msidata)), ","), `[[`, 1))
92 y_coords = unlist(lapply(strsplit(names(Cardinal::pixels(msidata)), ","), `[[`, 2))
93 x_coordinates = gsub("x = ","",x_coords)
94 y_coordinates = gsub(" y = ","",y_coords)
95 pixelnames = paste0("xy_", x_coordinates, "_", y_coordinates)
96
97 cardinal_coordinates = as.matrix(Cardinal::coord(msidata)[,1:2])
98
99 if (centroided(msidata) == FALSE){
100 ## create mass spectrum object
101 cardinal_mzs = Cardinal::mz(msidata)
102 maldi_data = list()
103 for(number_spectra in 1:ncol(msidata)){
104 maldi_data[[number_spectra]] = createMassSpectrum(mass = cardinal_mzs, intensity = iData(msidata)[,number_spectra])}
105 coordinates_info = cbind(cardinal_coordinates, c(1:length(maldi_data)))
106 }else{
107 peaks = list()
108 for (spectra in 1:ncol(msidata))
109 {
110 single_peaks = createMassPeaks(Cardinal::mz(msidata), Cardinal::spectra(msidata)[,spectra], snr=as.numeric(rep("NA", nrow(msidata))))
111 peaks[[spectra]] = single_peaks
112 }
113 coordinates_info = cbind(cardinal_coordinates, c(1:length(peaks)))}
114 #end if 82 #end if
115 #end if 83 #end if
116 84
117 ## default summarized = FALSE 85 ## default summarized = FALSE
118 summarized_spectra = FALSE 86 summarized_spectra = FALSE
127 title(main=paste("$filename")) 95 title(main=paste("$filename"))
128 96
129 ## plot input file spectrum: 97 ## plot input file spectrum:
130 #if $centroids: 98 #if $centroids:
131 ## Choose random spectra for QC plots 99 ## Choose random spectra for QC plots
132 print(length(peaks)) 100 random_spectra = sample(1:length(peaks), size=4, replace=length(peaks)<4)## in case dataset has less than 4 spectra, same spetra are allowed to be sampled
133 random_spectra = sample(1:length(peaks), 4, replace=FALSE)
134 random_spectra_name = pixelnames[random_spectra] 101 random_spectra_name = pixelnames[random_spectra]
135 par(mfrow = c(2, 2), oma=c(0,0,2,0)) 102 par(mfrow = c(2, 2), oma=c(0,0,2,0))
136 for (random_sample in random_spectra){ 103 for (random_sample in random_spectra){
137 plot(peaks[[random_sample]],sub="", main=paste0("spectrum ", pixelnames[random_sample]))} 104 plot(peaks[[random_sample]],sub="", main=paste0("spectrum ", pixelnames[random_sample]))}
138 title("Input spectra", outer=TRUE, line=0) 105 title("Input spectra", outer=TRUE, line=0)
139 106
140 #else 107 #else
141 ## Choose random spectra for QC plots 108 ## Choose random spectra for QC plots
142 random_spectra = sample(1:length(maldi_data), 4, replace=FALSE) 109 random_spectra = sample(1:length(maldi_data), size=4, replace=length(maldi_data)<4)## in case dataset has less than 4 spectra, same spetra are allowed to be sampled
143 par(mfrow = c(2, 2), oma=c(0,0,2,0)) 110 par(mfrow = c(2, 2), oma=c(0,0,2,0))
144 for (random_sample in random_spectra){ 111 for (random_sample in random_spectra){
145 plot(maldi_data[[random_sample]],sub="", main=paste0("spectrum ", pixelnames[random_sample])) 112 plot(maldi_data[[random_sample]],sub="", main=paste0("spectrum ", pixelnames[random_sample]))
146 } 113 }
147 title("Input spectra", outer=TRUE, line=0) 114 title("Input spectra", outer=TRUE, line=0)
148 #end if 115 #end if
149
150 116
151 ## QC numbers for input file 117 ## QC numbers for input file
152 #if str($centroids) == "TRUE" 118 #if str($centroids) == "TRUE"
153 pixel_number = length(peaks) 119 pixel_number = length(peaks)
154 minmz = round(min(unlist(lapply(peaks,mass))), digits=4) 120 minmz = round(min(unlist(lapply(peaks,mass))), digits=4)
155 maxmz = round(max(unlist(lapply(peaks,mass))), digits=4) 121 maxmz = round(max(unlist(lapply(peaks,mass))), digits=4)
156 mean_features = round(length(unlist(lapply(peaks,mass)))/length(peaks), digits=2) 122 mean_features = round(length(unlist(lapply(peaks,mass)))/length(peaks), digits=2)
157 medint = round(median(unlist(lapply(peaks,intensity))), digits=2) 123 medint = round(median(unlist(lapply(peaks,intensity))), digits=2)
158 number_features = length(unique(unlist(lapply(peaks,mass)))) 124 number_features = length(unique(unlist(lapply(peaks,mass))))
159 inputdata = c(minmz, maxmz,number_features,mean_features, medint) 125 number_spectra = length(peaks)
160 QC_numbers= data.frame(inputdata = c(minmz, maxmz,number_features, mean_features, medint)) 126 inputdata = c(minmz, maxmz,number_features,mean_features, medint, number_spectra)
127 QC_numbers= data.frame(inputdata = c(minmz, maxmz,number_features, mean_features, medint, number_spectra))
161 vectorofactions = "inputdata" 128 vectorofactions = "inputdata"
162 #else 129 #else
163 pixel_number = length(maldi_data) 130 pixel_number = length(maldi_data)
164 minmz = round(min(unlist(lapply(maldi_data,mass))), digits=4) 131 minmz = round(min(unlist(lapply(maldi_data,mass))), digits=4)
165 maxmz = round(max(unlist(lapply(maldi_data,mass))), digits=4) 132 maxmz = round(max(unlist(lapply(maldi_data,mass))), digits=4)
166 mean_features = round(length(unlist(lapply(maldi_data,mass)))/length(maldi_data), digits=2) 133 mean_features = round(length(unlist(lapply(maldi_data,mass)))/length(maldi_data), digits=2)
167 medint = round(median(unlist(lapply(maldi_data,intensity))), digits=2) 134 medint = round(median(unlist(lapply(maldi_data,intensity))), digits=2)
168 number_features = length(unique(unlist(lapply(maldi_data,mass)))) 135 number_features = length(unique(unlist(lapply(maldi_data,mass))))
169 inputdata = c(minmz, maxmz,number_features,mean_features, medint) 136 number_spectra = length(maldi_data)
170 QC_numbers= data.frame(inputdata = c(minmz, maxmz,number_features, mean_features, medint)) 137 inputdata = c(minmz, maxmz,number_features,mean_features, medint, number_spectra)
138 QC_numbers= data.frame(inputdata = c(minmz, maxmz,number_features, mean_features, medint, number_spectra))
171 vectorofactions = "inputdata" 139 vectorofactions = "inputdata"
172 #end if 140 #end if
173 141
174 #if str($tabular_annotation.load_annotation) == 'yes_annotation': 142 #if str($tabular_annotation.load_annotation) == 'yes_annotation':
175 143
176 ## read and extract x,y,annotation information 144 ## read and extract x,y,annotation information
177 input_tabular = read.delim("$tabular_annotation.annotation_file", header = $tabular_annotation.tabular_header, stringsAsFactors = FALSE) 145 input_tabular = read.delim("$tabular_annotation.annotation_file", header = $tabular_annotation.tabular_header, stringsAsFactors = FALSE)
178 annotation_input = input_tabular[,c($tabular_annotation.column_x, $tabular_annotation.column_y, $tabular_annotation.column_names)] 146 annotation_input = input_tabular[,c($tabular_annotation.column_x, $tabular_annotation.column_y, $tabular_annotation.column_names)]
147 annotation_input[,3] <- as.character(annotation_input[,3]) ## make sure annotations are character
179 colnames(annotation_input) = c("x", "y", "annotation") ## rename annotations header to default name "annotation" 148 colnames(annotation_input) = c("x", "y", "annotation") ## rename annotations header to default name "annotation"
180 149
181 ## merge provided annotation with coordinate information of MSI data 150 ## merge provided annotation with coordinate information of MSI data
182 colnames(coordinates_info)[3] = "pixel_index" 151 colnames(coordinates_info)[3] = "pixel_index"
183 merged_annotation = merge(coordinates_info, annotation_input, by=c("x", "y"), all.x=TRUE) 152 merged_annotation = merge(coordinates_info, annotation_input, by=c("x", "y"), all.x=TRUE)
184 merged_annotation[is.na(merged_annotation)] = "NA" 153 merged_annotation[is.na(merged_annotation)] = "NA"
185 ## order coordinate information according to pixel index to make sure that the order stays the same 154 ## order coordinate information according to pixel index to make sure that the order stays the same
186 merged_annotation = merged_annotation[order(merged_annotation\$pixel_index),] 155 merged_annotation = merged_annotation[order(merged_annotation\$pixel_index),]
187 samples = as.factor(merged_annotation\$annotation) 156 samples = as.factor(merged_annotation\$annotation)
188 157
189 ## print annotation overview into PDF output 158 ## print annotation overview into PDF output
190 159
191 combine_plot = ggplot(merged_annotation, aes(x=x, y=y, fill=annotation))+ 160 combine_plot = ggplot(merged_annotation, aes(x=x, y=y, fill=annotation))+
192 geom_tile() + 161 geom_tile() +
193 coord_fixed()+ 162 coord_fixed()+
194 ggtitle("Spatial orientation of annotated data")+ 163 ggtitle("Spatial orientation of annotated data")+
242 minmz = round(min(unlist(lapply(peaks,mass))), digits=4) 211 minmz = round(min(unlist(lapply(peaks,mass))), digits=4)
243 maxmz = round(max(unlist(lapply(peaks,mass))), digits=4) 212 maxmz = round(max(unlist(lapply(peaks,mass))), digits=4)
244 mean_features = round(length(unlist(lapply(peaks,mass)))/length(peaks), digits=2) 213 mean_features = round(length(unlist(lapply(peaks,mass)))/length(peaks), digits=2)
245 medint = round(median(unlist(lapply(peaks,intensity))), digits=2) 214 medint = round(median(unlist(lapply(peaks,intensity))), digits=2)
246 number_features = length(unique(unlist(lapply(peaks,mass)))) 215 number_features = length(unique(unlist(lapply(peaks,mass))))
247 peaks_picked = c(minmz, maxmz,number_features,mean_features, medint) 216 number_spectra = length(peaks)
217 peaks_picked = c(minmz, maxmz,number_features,mean_features, medint, number_spectra)
248 QC_numbers= cbind(QC_numbers, peaks_picked) 218 QC_numbers= cbind(QC_numbers, peaks_picked)
249 vectorofactions = append(vectorofactions, "peaks_picked") 219 vectorofactions = append(vectorofactions, "peaks_picked")
250 220
251 if (length(peaks[!sapply(peaks, isEmpty)])>0){ 221 if (length(peaks[!sapply(peaks, isEmpty)])>0){
252 featureMatrix <- intensityMatrix(peaks) 222 featureMatrix <- intensityMatrix(peaks)
291 minmz = round(min(unlist(lapply(peaks,mass))), digits=4) 261 minmz = round(min(unlist(lapply(peaks,mass))), digits=4)
292 maxmz = round(max(unlist(lapply(peaks,mass))), digits=4) 262 maxmz = round(max(unlist(lapply(peaks,mass))), digits=4)
293 mean_features = round(length(unlist(lapply(peaks,mass)))/length(peaks), digits=2) 263 mean_features = round(length(unlist(lapply(peaks,mass)))/length(peaks), digits=2)
294 medint = round(median(unlist(lapply(peaks,intensity))), digits=2) 264 medint = round(median(unlist(lapply(peaks,intensity))), digits=2)
295 number_features = length(unique(unlist(lapply(peaks,mass)))) 265 number_features = length(unique(unlist(lapply(peaks,mass))))
296 monoisotopes = c(minmz, maxmz,number_features,mean_features, medint) 266 number_spectra = length(peaks)
267 monoisotopes = c(minmz, maxmz,number_features,mean_features, medint, number_spectra)
297 QC_numbers= cbind(QC_numbers, monoisotopes) 268 QC_numbers= cbind(QC_numbers, monoisotopes)
298 vectorofactions = append(vectorofactions, "monoisotopes") 269 vectorofactions = append(vectorofactions, "monoisotopes")
299 270
300 if (length(peaks[!sapply(peaks, isEmpty)])>0){ 271 if (length(peaks[!sapply(peaks, isEmpty)])>0){
301 featureMatrix <- intensityMatrix(peaks) 272 featureMatrix <- intensityMatrix(peaks)
372 ## only if there are empty spectra to remove 343 ## only if there are empty spectra to remove
373 344
374 if (length(findEmptyMassObjects(peaks))>0) 345 if (length(findEmptyMassObjects(peaks))>0)
375 346
376 { 347 {
377 #if $infile.ext == 'rdata'
378 cardinal_coordinates = cardinal_coordinates[-findEmptyMassObjects(peaks),,drop=FALSE] ## remove coordinates of empty spectra for Cardinal RData input
379 #end if
380 348
381 peaks = removeEmptyMassObjects(peaks) 349 peaks = removeEmptyMassObjects(peaks)
382 pixelnames = paste("xy", coordinates(peaks)[,1],coordinates(peaks)[,2], sep="_") 350 pixelnames = paste("xy", coordinates(peaks)[,1],coordinates(peaks)[,2], sep="_")
383 } 351 }
384 #end if 352 #end if
402 minmz = round(min(unlist(lapply(peaks,mass))), digits=4) 370 minmz = round(min(unlist(lapply(peaks,mass))), digits=4)
403 maxmz = round(max(unlist(lapply(peaks,mass))), digits=4) 371 maxmz = round(max(unlist(lapply(peaks,mass))), digits=4)
404 mean_features = round(length(unlist(lapply(peaks,mass)))/length(peaks), digits=2) 372 mean_features = round(length(unlist(lapply(peaks,mass)))/length(peaks), digits=2)
405 medint = round(median(unlist(lapply(peaks,intensity))), digits=2) 373 medint = round(median(unlist(lapply(peaks,intensity))), digits=2)
406 number_features = length(unique(unlist(lapply(peaks,mass)))) 374 number_features = length(unique(unlist(lapply(peaks,mass))))
407 aligned = c(minmz, maxmz,number_features,mean_features, medint) 375 number_spectra = length(peaks)
376 aligned = c(minmz, maxmz,number_features,mean_features, medint, number_spectra)
408 QC_numbers= cbind(QC_numbers, aligned) 377 QC_numbers= cbind(QC_numbers, aligned)
409 vectorofactions = append(vectorofactions, "aligned") 378 vectorofactions = append(vectorofactions, "aligned")
410 379
411 if (length(peaks[!sapply(peaks, isEmpty)])>0){ 380 if (length(peaks[!sapply(peaks, isEmpty)])>0){
412 featureMatrix <- intensityMatrix(peaks) 381 featureMatrix <- intensityMatrix(peaks)
423 write.table(featureMatrix2, file="$intensity_matrix", quote = FALSE, row.names = TRUE, col.names=FALSE, sep = "\t") 392 write.table(featureMatrix2, file="$intensity_matrix", quote = FALSE, row.names = TRUE, col.names=FALSE, sep = "\t")
424 }else{print("There are no spectra with peaks left")} 393 }else{print("There are no spectra with peaks left")}
425 394
426 395
427 #elif str( $method.methods_conditional.method ) == 'Binning': 396 #elif str( $method.methods_conditional.method ) == 'Binning':
397
428 398
429 print('binning') 399 print('binning')
430 ##m/z binning 400 ##m/z binning
431 401
432 peaks <- binPeaks(peaks, tolerance=$method.methods_conditional.bin_tolerance, method="$method.methods_conditional.bin_method") 402 peaks <- binPeaks(peaks, tolerance=$method.methods_conditional.bin_tolerance, method="$method.methods_conditional.bin_method")
450 minmz = round(min(unlist(lapply(peaks,mass))), digits=4) 420 minmz = round(min(unlist(lapply(peaks,mass))), digits=4)
451 maxmz = round(max(unlist(lapply(peaks,mass))), digits=4) 421 maxmz = round(max(unlist(lapply(peaks,mass))), digits=4)
452 mean_features = round(length(unlist(lapply(peaks,mass)))/length(peaks), digits=2) 422 mean_features = round(length(unlist(lapply(peaks,mass)))/length(peaks), digits=2)
453 medint =round( median(unlist(lapply(peaks,intensity))), digits=2) 423 medint =round( median(unlist(lapply(peaks,intensity))), digits=2)
454 number_features = length(unique(unlist(lapply(peaks,mass)))) 424 number_features = length(unique(unlist(lapply(peaks,mass))))
455 binned = c(minmz, maxmz,number_features,mean_features, medint) 425 number_spectra = length(peaks)
426 binned = c(minmz, maxmz,number_features,mean_features, medint, number_spectra)
456 QC_numbers= cbind(QC_numbers, binned) 427 QC_numbers= cbind(QC_numbers, binned)
457 vectorofactions = append(vectorofactions, "binned") 428 vectorofactions = append(vectorofactions, "binned")
458 429
459 if (length(peaks[!sapply(peaks, isEmpty)])>0){ 430 if (length(peaks[!sapply(peaks, isEmpty)])>0){
460 featureMatrix <- intensityMatrix(peaks) 431 featureMatrix <- intensityMatrix(peaks)
475 #elif str( $method.methods_conditional.method ) == 'Filtering': 446 #elif str( $method.methods_conditional.method ) == 'Filtering':
476 447
477 print('filtering') 448 print('filtering')
478 ##m/z filtering 449 ##m/z filtering
479 450
480 ## filtering on all pixels or on pixel groups: 451
481 #if str($method.methods_conditional.filter_annot_groups ) == 'FALSE': 452 peaks <- filterPeaks(peaks
482 453 #if str( $method.methods_conditional.filter_type.filter_params ) != 'min_Number':
483 peaks <- filterPeaks(peaks, 454 ,minFrequency=$method.methods_conditional.filter_type.minFrequency
484 minFrequency=$method.methods_conditional.minFrequency,
485 minNumber=$method.methods_conditional.minNumber,
486 mergeWhitelists=$method.methods_conditional.mergeWhitelists)
487
488 #elif str( $method.methods_conditional.filter_annot_groups ) == 'TRUE':
489
490 peaks <- filterPeaks(peaks,
491 minFrequency=$method.methods_conditional.minFrequency,
492 minNumber=$method.methods_conditional.minNumber,
493 mergeWhitelists=$method.methods_conditional.mergeWhitelists, label = samples)
494 #end if 455 #end if
456 #if str( $method.methods_conditional.filter_type.filter_params ) != 'min_Frequency':
457 ,minNumber=$method.methods_conditional.filter_type.minNumber
458 #end if
459 ## in case of group wise filtering set grouping conditions
460 #if str( $method.methods_conditional.filter_groups.filter_annot_groups ) == 'yes_grouping':
461 ,mergeWhitelists=$method.methods_conditional.filter_groups.mergeWhitelists, label = samples
462 #end if
463 ) ## finishes filterPeaks function
464
495 465
496 ##QC plot and numbers 466 ##QC plot and numbers
497 par(mfrow = c(2, 2), oma=c(0,0,2,0)) 467 par(mfrow = c(2, 2), oma=c(0,0,2,0))
498 468
499 for (random_sample in random_spectra){ 469 for (random_sample in random_spectra){
506 plot(NULL, xlim=c(0,0), ylim=c(0,0), ylab="intensity", xlab="m/z") 476 plot(NULL, xlim=c(0,0), ylim=c(0,0), ylab="intensity", xlab="m/z")
507 } 477 }
508 ) 478 )
509 } 479 }
510 480
481 ## QC numbers for filtered file
511 title("Filtered spectra", outer=TRUE, line=0) 482 title("Filtered spectra", outer=TRUE, line=0)
512 minmz = round(min(unlist(lapply(peaks,mass))), digits=4) 483 minmz = round(min(unlist(lapply(peaks,mass))), digits=4)
513 maxmz = round(max(unlist(lapply(peaks,mass))), digits=4) 484 maxmz = round(max(unlist(lapply(peaks,mass))), digits=4)
514 mean_features = round(length(unlist(lapply(peaks,mass)))/length(peaks), digits=2) 485 mean_features = round(length(unlist(lapply(peaks,mass)))/length(peaks), digits=2)
515 medint = round(median(unlist(lapply(peaks,intensity))), digits=2) 486 medint = round(median(unlist(lapply(peaks,intensity))), digits=2)
516 number_features = length(unique(unlist(lapply(peaks,mass)))) 487 number_features = length(unique(unlist(lapply(peaks,mass))))
517 filtered = c(minmz, maxmz,number_features,mean_features, medint) 488 number_spectra = length(peaks)
489 filtered = c(minmz, maxmz,number_features,mean_features, medint, number_spectra)
518 QC_numbers= cbind(QC_numbers, filtered) 490 QC_numbers= cbind(QC_numbers, filtered)
519 vectorofactions = append(vectorofactions, "filtered") 491 vectorofactions = append(vectorofactions, "filtered")
520 492
521 if (length(peaks[!sapply(peaks, isEmpty)])>0){ 493 if (length(peaks[!sapply(peaks, isEmpty)])>0){
522 featureMatrix <- intensityMatrix(peaks) 494 featureMatrix <- intensityMatrix(peaks)
529 colnames(featureMatrix2)[1] = c("mz") 501 colnames(featureMatrix2)[1] = c("mz")
530 featureMatrix2 = t(featureMatrix2) 502 featureMatrix2 = t(featureMatrix2)
531 }else{print("There are no spectra with peaks left") 503 }else{print("There are no spectra with peaks left")
532 featureMatrix2 = matrix(0, ncol=1, nrow=1)} 504 featureMatrix2 = matrix(0, ncol=1, nrow=1)}
533 write.table(featureMatrix2, file="$intensity_matrix", quote = FALSE, row.names = TRUE, col.names=FALSE, sep = "\t") 505 write.table(featureMatrix2, file="$intensity_matrix", quote = FALSE, row.names = TRUE, col.names=FALSE, sep = "\t")
506
507 #elif str( $method.methods_conditional.method ) == 'skip_preprocessing':
508 ##for now as option to filter large files
509
534 #end if 510 #end if
511
535 #end for 512 #end for
536 513
537 if (length(peaks[!sapply(peaks, isEmpty)])>0){ 514 if (length(peaks[!sapply(peaks, isEmpty)])>0){
538 ## mass peaks output 515 ## mass peaks output
539 mass_peaks = data.frame(matrix(,ncol=3, nrow=0)) 516 mass_peaks = data.frame(matrix(,ncol=3, nrow=0))
545 colnames(mass_peaks) = c("snr", "mass", "intensity", "spectrum") 522 colnames(mass_peaks) = c("snr", "mass", "intensity", "spectrum")
546 write.table(mass_peaks, file="$masspeaks", quote = FALSE, row.names = FALSE, col.names=TRUE, sep = "\t") 523 write.table(mass_peaks, file="$masspeaks", quote = FALSE, row.names = FALSE, col.names=TRUE, sep = "\t")
547 }else{print("There are no spectra with peaks left")} 524 }else{print("There are no spectra with peaks left")}
548 525
549 ## print table with QC values 526 ## print table with QC values
550 rownames(QC_numbers) = c("min m/z", "max mz", "# features", "median \n# peaks (int.>0)", "median\nintensity") 527 rownames(QC_numbers) = c("min m/z", "max mz", "# features", "median \n# peaks (int.>0)", "median\nintensity", "# spectra")
551 plot(0,type='n',axes=FALSE,ann=FALSE) 528 plot(0,type='n',axes=FALSE,ann=FALSE)
552 grid.table(t(QC_numbers)) 529 grid.table(t(QC_numbers))
553 530
554 dev.off() 531 dev.off()
532
555 533
556 if (summarized_spectra == FALSE){ 534 if (summarized_spectra == FALSE){
557 #if $infile.ext == 'imzml' 535 #if $infile.ext == 'imzml'
558 MALDIquantForeign::exportImzMl(peaks, file="out.imzMl", processed=TRUE) 536 MALDIquantForeign::exportImzMl(peaks, file="out.imzMl", processed=TRUE)
559 #elif $infile.ext == 'tabular' 537 #elif $infile.ext == 'tabular'
560 masspeaks_coordinates = matrix(unlist(strsplit(as.character(pixelnames), "\\_")), ncol=3, byrow=TRUE) 538 masspeaks_coordinates = matrix(unlist(strsplit(as.character(pixelnames), "\\_")), ncol=3, byrow=TRUE)
561 ## extract x and y values and create the coordinate matrix in case tabular was input 539 ## extract x and y values and create the coordinate matrix in case tabular was input
562 peaklist_coordinates = unique(cbind(as.numeric(masspeaks_coordinates[,2]), as.numeric(masspeaks_coordinates[,3]))) 540 peaklist_coordinates = unique(cbind(as.numeric(masspeaks_coordinates[,2]), as.numeric(masspeaks_coordinates[,3])))
563 exportImzMl(peaks, file="out.imzMl", processed=TRUE, coordinates=peaklist_coordinates) 541 exportImzMl(peaks, file="out.imzMl", processed=TRUE, coordinates=peaklist_coordinates)
564 #elif $infile.ext == 'rdata'
565 MALDIquantForeign::exportImzMl(peaks, file="out.imzMl", processed=TRUE, coordinates=cardinal_coordinates)
566 #end if 542 #end if
567 543
568 } 544 }
569 545
570 ]]> 546 ]]>
571 </configfile> 547 </configfile>
572 </configfiles> 548 </configfiles>
573 <inputs> 549 <inputs>
574 <param name="infile" type="data" format="imzml,tabular,rdata" label="MSI data" help="Input file as imzML (composite upload), tabular peaklist or Cardinal MSImageSet saved as RData (regular upload)"/> 550 <param name="infile" type="data" format="imzml,tabular" label="MSI data" help="Input file as imzML (composite upload) or tabular peaklist (regular upload)"/>
575 <param name="centroids" type="boolean" label="Centroided input" help="Choose Yes if peak detection has already been done. Peak detection cannot be run again on centroided data" truevalue="TRUE" falsevalue="FALSE"/> 551 <param name="centroids" type="boolean" label="Centroided input" help="Choose Yes if peak detection has already been done. Peak detection cannot be run again on centroided data" truevalue="TRUE" falsevalue="FALSE"/>
576 <conditional name="restriction_conditional"> 552 <conditional name="restriction_conditional">
577 <param name="restriction" type="select" label="Use only spectra of interest" help="This option only works for imzML files"> 553 <param name="restriction" type="select" label="Use only spectra of interest" help="This option only works for imzML files">
578 <option value="no_restriction" selected="True">No, calculate on entire file</option> 554 <option value="no_restriction" selected="True">No, use all spectra</option>
579 <option value="restrict">Yes, restrict to spectra of interest</option> 555 <option value="restrict">Yes, restrict to spectra of interest</option>
580 </param> 556 </param>
581 <when value="restrict"> 557 <when value="restrict">
582 <param name="coordinates_file" type="data" format="tabular" label="Tabular file with coordinates"/> 558 <param name="coordinates_file" type="data" format="tabular" label="Tabular file with coordinates"/>
583 <param name="column_x" data_ref="coordinates_file" label="Column with x values" type="data_column"/> 559 <param name="column_x" data_ref="coordinates_file" label="Column with x values" type="data_column"/>
608 <option value="Peak_detection">Peak detection</option> 584 <option value="Peak_detection">Peak detection</option>
609 <option value="monoisotopic_peaks">Monoisotopic peaks</option> 585 <option value="monoisotopic_peaks">Monoisotopic peaks</option>
610 <option value="Align">Align Spectra (warping/phase correction)</option> 586 <option value="Align">Align Spectra (warping/phase correction)</option>
611 <option value="Binning">Binning</option> 587 <option value="Binning">Binning</option>
612 <option value="Filtering">Filtering</option> 588 <option value="Filtering">Filtering</option>
589 <option value="skip_preprocessing">No method</option>
613 </param> 590 </param>
614 <when value="Peak_detection"> 591 <when value="Peak_detection">
615 <param name="peak_method" type="select" label="Noise estimation function"> 592 <param name="peak_method" type="select" label="Noise estimation function">
616 <option value="MAD" selected="True">MAD</option> 593 <option value="MAD" selected="True">MAD</option>
617 <option value="SuperSmoother">SuperSmoother</option> 594 <option value="SuperSmoother">SuperSmoother</option>
658 <param name="tolerance" type="float" value="0.00005" 635 <param name="tolerance" type="float" value="0.00005"
659 label="Tolerance = abs(mz1 - mz2)/mz2" 636 label="Tolerance = abs(mz1 - mz2)/mz2"
660 help="Maximal relative deviation of a peak position (m/z) to be considered as identical. For 50ppm use 0.00005 or 5e-5" /> 637 help="Maximal relative deviation of a peak position (m/z) to be considered as identical. For 50ppm use 0.00005 or 5e-5" />
661 <param name="allow_nomatch" type="boolean" label="Don't throw an error when less than 2 reference m/z were found in a spectrum" truevalue="TRUE" falsevalue="FALSE"/> 638 <param name="allow_nomatch" type="boolean" label="Don't throw an error when less than 2 reference m/z were found in a spectrum" truevalue="TRUE" falsevalue="FALSE"/>
662 <param name="empty_nomatch" type="boolean" label="If TRUE the intensity values of MassSpectrum or MassPeaks objects with missing (NA) warping functions are set to zero" truevalue="TRUE" falsevalue="FALSE"/> 639 <param name="empty_nomatch" type="boolean" label="If TRUE the intensity values of MassSpectrum or MassPeaks objects with missing (NA) warping functions are set to zero" truevalue="TRUE" falsevalue="FALSE"/>
663 <param name="remove_empty" type="boolean" label="Should empty spectra be removed" truevalue="TRUE" falsevalue="FALSE" help="For Cardinal RData files this step can only be performed if pixel annotations were provided"/> 640 <param name="remove_empty" type="boolean" label="Should empty spectra be removed" truevalue="TRUE" falsevalue="FALSE"/>
664 641
665 <conditional name="reference_for_alignment"> 642 <conditional name="reference_for_alignment">
666 <param name="align_ref" type="select" label="Reference" help="If given, samples will be aligned to reference, use internal calibrants to perform m/z calibration"> 643 <param name="align_ref" type="select" label="Reference" help="If given, samples will be aligned to reference, use internal calibrants to perform m/z calibration">
667 <option value="no_reference" selected="True">no reference</option> 644 <option value="no_reference" selected="True">no reference</option>
668 <option value="yes_reference">reference from tabular file</option> 645 <option value="yes_reference">reference from tabular file</option>
688 <option value="strict" selected="True" >strict</option> 665 <option value="strict" selected="True" >strict</option>
689 <option value="relaxed" >relaxed</option> 666 <option value="relaxed" >relaxed</option>
690 </param> 667 </param>
691 </when> 668 </when>
692 <when value="Filtering"> 669 <when value="Filtering">
693 <param name="minFrequency" type="float" value="0.25" 670 <conditional name="filter_groups">
694 label="Removal of all peaks which occur in less than minFrequency spectra" help="Relative threshold. The higher value from relative and absolute threshold is taken. Set one value to zero to be sure it will not be used."/> 671 <param name="filter_annot_groups" type="select" label="m/z filtering parameters are applied to all spectra or groups of spectra" help="By default a single group is assumed and filtering will be done based on all pixels. To filter groups of spectra, an annotation annotation file has to be specified above">
695 <param name="minNumber" type="float" value="1.0" 672 <option value="no_grouping" selected="True">use single group</option>
696 label="Removal of all peaks which occur in less than minNumber spectra" help="Absolute threshold. The higher value from relative and absolute threshold is taken. Set one value to zero to be sure it will not be used."/> 673 <option value="yes_grouping">use spectra groups from annotation file</option>
697 <param name="filter_annot_groups" type="boolean" label="Group wise filtering with pixel annotations." help="If not specified a single group is assumed or when filtering has been done group wise it will automatically be group wise when selecting filtering on all pixel" truevalue="TRUE" falsevalue="FALSE"/> 674 </param>
698 <param name="mergeWhitelists" type="boolean" truevalue="TRUE" falsevalue="FALSE" 675 <when value="no_grouping"/>
699 label="mergeWhitelists" help="Yes means that peaks that survive the filtering in one annotation group are also kept in other groups regardless if the filtering criteria are met in these groups"/> 676 <when value="yes_grouping">
677 <param name="mergeWhitelists" type="boolean" truevalue="TRUE" falsevalue="FALSE"
678 label="mergeWhitelists" help="Yes means that peaks that survive the filtering in one annotation group are also kept in other groups regardless if the filtering criteria are met in these groups"/>
679 </when>
680 </conditional>
681 <conditional name="filter_type">
682 <param name="filter_params" type="select" label="m/z filtering parameters" help="minFrequency: Removal of all peaks which occur in less than minFrequency spectra (relative treshold). minNumber: Removal of all peaks which ocur in lass than minNumber spectra (absolute threshold). If both are set the higher value from relative and absolute threshold is taken.">
683 <option value="min_Frequency" selected="True">minFrequency</option>
684 <option value="min_Number">minNumber</option>
685 <option value="both">both</option>
686 </param>
687 <when value="min_Frequency">
688 <param name="minFrequency" type="float" value="0.25" label="Removal of all peaks which occur in less than minFrequency spectra" />
689 </when>
690 <when value="min_Number">
691 <param name="minNumber" type="float" value="1.0" label="Removal of all peaks which occur in less than minNumber spectra" />
692 </when>
693 <when value="both">
694 <param name="minFrequency" type="float" value="0.25" label="Removal of all peaks which occur in less than minFrequency spectra" />
695 <param name="minNumber" type="float" value="1.0" label="Removal of all peaks which occur in less than minNumber spectra" />
696 </when>
697 </conditional>
700 </when> 698 </when>
699 <when value="skip_preprocessing"/>
701 </conditional> 700 </conditional>
702 </repeat> 701 </repeat>
703 </inputs> 702 </inputs>
704 <outputs> 703 <outputs>
705 <data format="imzml" name="outfile_imzml" label="${tool.name} on ${on_string}: imzml"> 704 <data format="imzml" name="outfile_imzml" label="${tool.name} on ${on_string}: imzml">
742 <repeat name="methods"> 741 <repeat name="methods">
743 <conditional name="methods_conditional"> 742 <conditional name="methods_conditional">
744 <param name="method" value="monoisotopic_peaks"/> 743 <param name="method" value="monoisotopic_peaks"/>
745 <param name="tolerance" value="0.0004"/> 744 <param name="tolerance" value="0.0004"/>
746 <param name="size" value="3"/> 745 <param name="size" value="3"/>
747 </conditional> 746 </conditional>
748 </repeat> 747 </repeat>
749 <output name="plots" file="peakdetection2_QC.pdf" compare="sim_size"/> 748 <output name="plots" file="peakdetection2_QC.pdf" compare="sim_size"/>
750 <output name="masspeaks" file="masspeaks2.tabular"/> 749 <output name="masspeaks" file="masspeaks2.tabular"/>
751 <output name="intensity_matrix" file="int2.tabular"/> 750 <output name="intensity_matrix" file="int2.tabular"/>
752 <output name="outfile_imzml" ftype="imzml" file="peak_detection2.imzml.txt" lines_diff="4"> 751 <output name="outfile_imzml" ftype="imzml" file="peak_detection2.imzml.txt" lines_diff="4">
753 <extra_files type="file" file="peak_detection2.imzml" name="imzml" lines_diff="6"/> 752 <extra_files type="file" file="peak_detection2.imzml" name="imzml" lines_diff="6"/>
754 <extra_files type="file" file="peak_detection2.ibd" name="ibd" compare="sim_size"/> 753 <extra_files type="file" file="peak_detection2.ibd" name="ibd" compare="sim_size"/>
755 </output> 754 </output>
756 </test> 755 </test>
757 <test> 756 <test>
758 <param name="infile" value="" ftype="imzml"> 757 <param name="infile" value="" ftype="imzml">
759 <composite_data value="Example_Continuous.imzML"/> 758 <composite_data value="Example_Continuous.imzML"/>
760 <composite_data value="Example_Continuous.ibd"/> 759 <composite_data value="Example_Continuous.ibd"/>
761 </param> 760 </param>
762 <conditional name="tabular_annotation"> 761 <conditional name="tabular_annotation">
769 </conditional> 768 </conditional>
770 <repeat name="methods"> 769 <repeat name="methods">
771 <conditional name="methods_conditional"> 770 <conditional name="methods_conditional">
772 <param name="method" value="Peak_detection"/> 771 <param name="method" value="Peak_detection"/>
773 <param name="peak_method" value="MAD"/> 772 <param name="peak_method" value="MAD"/>
774 <param name="halfWindowSize" value="1"/> 773 <param name="halfWindowSize" value="10"/>
775 <param name="snr" value="2"/> 774 <param name="snr" value="2"/>
775 </conditional>
776 </repeat>
777 <repeat name="methods">
778 <conditional name="methods_conditional">
779 <param name="method" value="Filtering"/>
780 <conditional name="filter_groups">
781 <param name="filter_annot_groups" value="yes_grouping"/>
782 <param name="mergeWhitelists" value="FALSE"/>
783 </conditional>
784 <conditional name="filter_type">
785 <param name="filter_params" value="min_Frequency"/>
786 <param name="minFrequency" value="0.25"/>
787 </conditional>
788 </conditional>
789 </repeat>
790 <output name="plots" file="peakdetection3_QC.pdf" compare="sim_size"/>
791 <output name="intensity_matrix" file="int3.tabular"/>
792 <output name="masspeaks" file="masspeaks3.tabular"/>
793 <output name="outfile_imzml" ftype="imzml" file="peak_detection3.imzml.txt" lines_diff="4">
794 <extra_files type="file" file="peak_detection3.imzml" name="imzml" lines_diff="6"/>
795 <extra_files type="file" file="peak_detection3.ibd" name="ibd" compare="sim_size"/>
796 </output>
797 </test>
798 <test>
799 <param name="infile" value="" ftype="imzml">
800 <composite_data value="Example_Processed.imzML"/>
801 <composite_data value="Example_Processed.ibd"/>
802 </param>
803 <repeat name="methods">
804 <conditional name="methods_conditional">
805 <param name="method" value="Peak_detection"/>
806 <param name="peak_method" value="SuperSmoother"/>
807 <param name="halfWindowSize" value="5"/>
808 <param name="snr" value="3"/>
776 </conditional> 809 </conditional>
777 </repeat> 810 </repeat>
778 <repeat name="methods"> 811 <repeat name="methods">
779 <conditional name="methods_conditional"> 812 <conditional name="methods_conditional">
780 <param name="method" value="Binning"/> 813 <param name="method" value="Binning"/>
781 <param name="bin_tolerance" value="0.01"/> 814 <param name="bin_tolerance" value="0.01"/>
782 </conditional> 815 </conditional>
783 </repeat> 816 </repeat>
784 <repeat name="methods">
785 <conditional name="methods_conditional">
786 <param name="method" value="Filtering"/>
787 <param name="minFrequency" value="0.5"/>
788 <param name="minNumber" value="3"/>
789 <param name="filter_annot_groups" value="TRUE"/>
790 <param name="mergeWhitelists" value="FALSE"/>
791 </conditional>
792 </repeat>
793 <output name="plots" file="peakdetection3_QC.pdf" compare="sim_size"/>
794 <output name="intensity_matrix" file="intensity_matrix3.tabular"/>
795 <output name="masspeaks" file="masspeaks3.tabular"/>
796 <output name="outfile_imzml" ftype="imzml" file="peak_detection3.imzml.txt" lines_diff="4">
797 <extra_files type="file" file="peak_detection3.imzml" name="imzml" lines_diff="6"/>
798 <extra_files type="file" file="peak_detection3.ibd" name="ibd" compare="sim_size"/>
799 </output>
800 </test>
801 <test>
802 <param name="infile" value="testfile_squares.rdata" ftype="rdata"/>
803 <param name="method" value="Peak_detection"/>
804 <param name="peak_method" value="MAD"/>
805 <param name="halfWindowSize" value="20"/>
806 <param name="snr" value="2"/>
807 <output name="plots" file="peakdetection4_QC.pdf" compare="sim_size"/> 817 <output name="plots" file="peakdetection4_QC.pdf" compare="sim_size"/>
808 <output name="intensity_matrix" file="intensity_matrix4.tabular"/> 818 <output name="intensity_matrix" file="int4.tabular"/>
809 <output name="masspeaks" file="masspeaks4.tabular"/> 819 <output name="masspeaks" file="masspeaks4.tabular"/>
810 <output name="outfile_imzml" ftype="imzml" file="peak_detection4.imzml.txt" lines_diff="4"> 820 <output name="outfile_imzml" ftype="imzml" file="peak_detection4.imzml.txt" lines_diff="4">
811 <extra_files type="file" file="peak_detection4.imzml" name="imzml" lines_diff="6"/> 821 <extra_files type="file" file="peak_detection4.imzml" name="imzml" lines_diff="6"/>
812 <extra_files type="file" file="peak_detection4.ibd" name="ibd" compare="sim_size"/> 822 <extra_files type="file" file="peak_detection4.ibd" name="ibd" compare="sim_size"/>
813 </output> 823 </output>
814 </test> 824 </test>
825 <test>
826 <param name="infile" value="" ftype="imzml">
827 <composite_data value="preprocessing_results3.imzML"/>
828 <composite_data value="preprocessing_results3.ibd"/>
829 </param>
830 <param name="centroids" value="TRUE"/>
831 <conditional name="restriction_conditional">
832 <param name="restriction" value="restrict"/>
833 <param name="coordinates_file" value="annotations.tabular"/>
834 <param name="column_x" value="1"/>
835 <param name="column_y" value="2"/>
836 <param name="coordinates_header" value="TRUE"/>
837 </conditional>
838 <repeat name="methods">
839 <conditional name="methods_conditional">
840 <param name="method" value="skip_preprocessing"/>
841 </conditional>
842 </repeat>
843 <output name="plots" file="peakdetection5_QC.pdf" compare="sim_size"/>
844 <output name="masspeaks" file="masspeaks5.tabular"/>
845 <output name="outfile_imzml" ftype="imzml" file="peak_detection5.imzml.txt" lines_diff="4">
846 <extra_files type="file" file="peak_detection5.imzml" name="imzml" lines_diff="6"/>
847 <extra_files type="file" file="peak_detection5.ibd" name="ibd" compare="sim_size"/>
848 </output>
849 </test>
815 </tests> 850 </tests>
851
816 <help> 852 <help>
817 <![CDATA[ 853 <![CDATA[
818 854
819 @MADLI_QUANT_DESCRIPTION@ 855 @MADLI_QUANT_DESCRIPTION@
820 856
822 858
823 **Input data** 859 **Input data**
824 860
825 - MSI data: 3 types of input data can be used: 861 - MSI data: 3 types of input data can be used:
826 862
827 - imzml file (upload imzml and ibd file via the "composite" function) `Introduction to the imzml format <https://ms-imaging.org/wp/imzml/>`_ 863 - imzml file (upload imzml and ibd file via the "composite" function) `Introduction to the imzml format <https://ms-imaging.org/imzml/>`_
828 - Cardinal "MSImageSet" data saved as .RData
829 - MSI data as peak list (tabular file) with the columns named "snr", "mass", "intensity" and "spectrum". The spectrum has to be in the following format: xy_1_1 (for pixel coordinates x1y1). The header must have exactly the four column names. 864 - MSI data as peak list (tabular file) with the columns named "snr", "mass", "intensity" and "spectrum". The spectrum has to be in the following format: xy_1_1 (for pixel coordinates x1y1). The header must have exactly the four column names.
830 865
831 :: 866 ::
832 867
833 snr mass intensity spectrum 868 snr mass intensity spectrum