comparison maldi_quant_peakdetection.xml @ 2:17c54820f3be draft

planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/MALDIquant commit d2f311f7fff24e54c565127c40414de708e31b3c
author galaxyp
date Thu, 25 Oct 2018 07:32:17 -0400
parents eaaa73b043e6
children 36d38d2cf88c
comparison
equal deleted inserted replaced
1:eaaa73b043e6 2:17c54820f3be
1 <tool id="maldi_quant_peak_detection" name="MALDIquant peak detection" version="@VERSION@.1"> 1 <tool id="maldi_quant_peak_detection" name="MALDIquant peak detection" version="@VERSION@.2">
2 <description> 2 <description>
3 Peak detection, binning and filtering for mass-spectrometry imaging data 3 Peak detection, binning and filtering for mass-spectrometry imaging data
4 </description> 4 </description>
5 <macros> 5 <macros>
6 <import>maldi_macros.xml</import> 6 <import>maldi_macros.xml</import>
29 <configfiles> 29 <configfiles>
30 <configfile name="maldi_quant_peak_detection"><![CDATA[ 30 <configfile name="maldi_quant_peak_detection"><![CDATA[
31 31
32 @R_IMPORTS@ 32 @R_IMPORTS@
33 33
34
35
36 #if $restriction_conditional.restriction == 'restrict': 34 #if $restriction_conditional.restriction == 'restrict':
37 35
38 print('Reading mask region') 36 print('Reading mask region')
39 37
40 ## Import imzML file 38 ## Import imzML file
52 50
53 #if $infile.ext == 'imzml' 51 #if $infile.ext == 'imzml'
54 print('imzML file') 52 print('imzML file')
55 #if str($centroids) == "TRUE" 53 #if str($centroids) == "TRUE"
56 peaks <- importImzMl('infile.imzML', centroided = $centroids) 54 peaks <- importImzMl('infile.imzML', centroided = $centroids)
57 pixelnames = paste("xy", coordinates(maldi_data)[,1],coordinates(maldi_data)[,2], sep="_") 55 pixelnames = paste("xy", coordinates(peaks)[,1],coordinates(peaks)[,2], sep="_")
56 coordinates_info = cbind(coordinates(peaks)[,1:2], c(1:length(peaks)))
58 #else 57 #else
59 maldi_data <- importImzMl('infile.imzML', centroided = $centroids) 58 maldi_data <- importImzMl('infile.imzML', centroided = $centroids)
60 pixelnames = paste("xy", coordinates(maldi_data)[,1],coordinates(maldi_data)[,2], sep="_") 59 pixelnames = paste("xy", coordinates(maldi_data)[,1],coordinates(maldi_data)[,2], sep="_")
60 coordinates_info = cbind(coordinates(maldi_data)[,1:2], c(1:length(maldi_data)))
61 #end if 61 #end if
62 coordinates_info = cbind(coordinates(maldi_data)[,1:2], c(1:length(maldi_data))) 62
63 63
64 #elif $infile.ext == 'tabular' 64 #elif $infile.ext == 'tabular'
65 print('tabular file') 65 print('tabular file')
66 #set $centroids = "TRUE" ## will be used in some if conditions 66 #set $centroids = "TRUE" ## will be used in some if conditions
67 peak_tabular = read.delim("$infile", header = TRUE, stringsAsFactors = FALSE) 67 peak_tabular = read.delim("$infile", header = TRUE, stringsAsFactors = FALSE)
82 load(fileName) 82 load(fileName)
83 get(ls()[ls() != "fileName"]) 83 get(ls()[ls() != "fileName"])
84 } 84 }
85 msidata = loadRData('infile.RData') 85 msidata = loadRData('infile.RData')
86 centroided(msidata) = $centroids 86 centroided(msidata) = $centroids
87 pixelnames = gsub(", y = ", "_", names(Cardinal::pixels(msidata))) 87 ## change to correct pixelnames
88 pixelnames = gsub(" = ", "y_", pixelnames) 88
89 x_coords = unlist(lapply(strsplit(names(Cardinal::pixels(msidata)), ","), `[[`, 1))
90 y_coords = unlist(lapply(strsplit(names(Cardinal::pixels(msidata)), ","), `[[`, 2))
91 x_coordinates = gsub("x = ","",x_coords)
92 y_coordinates = gsub(" y = ","",y_coords)
93 pixelnames = paste0("xy_", x_coordinates, "_", y_coordinates)
89 94
90 cardinal_coordinates = as.matrix(Cardinal::coord(msidata)[,1:2]) 95 cardinal_coordinates = as.matrix(Cardinal::coord(msidata)[,1:2])
91 96
92 if (centroided(msidata) == FALSE){ 97 if (centroided(msidata) == FALSE){
93 ## create mass spectrum object 98 ## create mass spectrum object
94 cardinal_mzs = Cardinal::mz(msidata) 99 cardinal_mzs = Cardinal::mz(msidata)
95 maldi_data = list() 100 maldi_data = list()
96 for(number_spectra in 1:ncol(msidata)){ 101 for(number_spectra in 1:ncol(msidata)){
97 maldi_data[[number_spectra]] = createMassSpectrum(mass = cardinal_mzs, intensity = iData(msidata)[,number_spectra]) 102 maldi_data[[number_spectra]] = createMassSpectrum(mass = cardinal_mzs, intensity = iData(msidata)[,number_spectra])
98 coordinates_info = cbind(cardinal_coordinates, c(1:length(maldi_data)))} 103 coordinates_info = cbind(cardinal_coordinates, c(1:length(maldi_data)))}
99 coordinates_info = cbind(cardinal_coordinates, c(1:length(maldi_data)))
100 }else{ 104 }else{
101 peaks = list() 105 peaks = list()
102 for (spectra in 1:ncol(msidata)) 106 for (spectra in 1:ncol(msidata))
103 { 107 {
104 single_peaks = createMassPeaks(Cardinal::mz(msidata), Cardinal::spectra(msidata)[,spectra], snr=as.numeric(rep("NA", nrow(msidata)))) 108 single_peaks = createMassPeaks(Cardinal::mz(msidata), Cardinal::spectra(msidata)[,spectra], snr=as.numeric(rep("NA", nrow(msidata))))
105 peaks[[spectra]] = single_peaks 109 peaks[[spectra]] = single_peaks
106 }} 110 }}
107 #end if 111 #end if
108 #end if 112 #end if
109 113
110
111
112
113
114
115
116
117
118
119 ## default summarized = FALSE 114 ## default summarized = FALSE
120 summarized_spectra = FALSE 115 summarized_spectra = FALSE
121 116
122 ## Quality control plots during peak detection 117 ## Quality control plots during peak detection
123 pdf("peaks_qc_plot.pdf", fonts = "Times", pointsize = 12) 118 pdf("peaks_qc_plot.pdf", fonts = "Times", pointsize = 12)
127 #set $filename = $infile.display_name 122 #set $filename = $infile.display_name
128 123
129 title(main=paste("$filename")) 124 title(main=paste("$filename"))
130 125
131 ## plot input file spectrum: 126 ## plot input file spectrum:
132 #if str($centroids) == "TRUE" 127 #if $centroids:
133 plot(peaks[[1]], main="First spectrum of input file") 128 plot(peaks[[1]], main="First spectrum of input file")
134 #else 129 #else
135 avgSpectra <- averageMassSpectra(maldi_data,method="mean") 130 avgSpectra <- averageMassSpectra(maldi_data,method="mean")
136 plot(avgSpectra, main="Average spectrum of input file") 131 plot(avgSpectra, main="Average spectrum of input file")
137 #end if 132 #end if
138 133
139 134
140
141
142
143
144
145
146
147 ## QC numbers for input file 135 ## QC numbers for input file
148 #if str($centroids) == "TRUE" 136 #if str($centroids) == "TRUE"
149 pixel_number = length(peaks) 137 pixel_number = length(peaks)
150 minmz = round(min(unlist(lapply(peaks,mass))), digits=4) 138 minmz = round(min(unlist(lapply(peaks,mass))), digits=4)
151 maxmz = round(max(unlist(lapply(peaks,mass))), digits=4) 139 maxmz = round(max(unlist(lapply(peaks,mass))), digits=4)
152 maxfeatures = round(length(unlist(lapply(peaks,mass)))/length(peaks), digits=2) 140 mean_features = round(length(unlist(lapply(peaks,mass)))/length(peaks), digits=2)
153 medint = round(median(unlist(lapply(peaks,intensity))), digits=2) 141 medint = round(median(unlist(lapply(peaks,intensity))), digits=2)
154 inputdata = c(minmz, maxmz,maxfeatures, medint) 142 number_features = length(unique(unlist(lapply(peaks,mass))))
155 QC_numbers= data.frame(inputdata = c(minmz, maxmz,maxfeatures, medint)) 143 inputdata = c(minmz, maxmz,number_features,mean_features, medint)
144 QC_numbers= data.frame(inputdata = c(minmz, maxmz,number_features, mean_features, medint))
156 vectorofactions = "inputdata" 145 vectorofactions = "inputdata"
157 #else 146 #else
158 pixel_number = length(maldi_data) 147 pixel_number = length(maldi_data)
159 minmz = round(min(unlist(lapply(maldi_data,mass))), digits=4) 148 minmz = round(min(unlist(lapply(maldi_data,mass))), digits=4)
160 maxmz = round(max(unlist(lapply(maldi_data,mass))), digits=4) 149 maxmz = round(max(unlist(lapply(maldi_data,mass))), digits=4)
161 maxfeatures = round(length(unlist(lapply(maldi_data,mass)))/length(maldi_data), digits=2) 150 mean_features = round(length(unlist(lapply(maldi_data,mass)))/length(maldi_data), digits=2)
162 medint = round(median(unlist(lapply(maldi_data,intensity))), digits=2) 151 medint = round(median(unlist(lapply(maldi_data,intensity))), digits=2)
163 inputdata = c(minmz, maxmz,maxfeatures, medint) 152 number_features = length(unique(unlist(lapply(maldi_data,mass))))
164 QC_numbers= data.frame(inputdata = c(minmz, maxmz,maxfeatures, medint)) 153 inputdata = c(minmz, maxmz,number_features,mean_features, medint)
154 QC_numbers= data.frame(inputdata = c(minmz, maxmz,number_features, mean_features, medint))
165 vectorofactions = "inputdata" 155 vectorofactions = "inputdata"
166 #end if 156 #end if
167 157
168 #if str($tabular_annotation.load_annotation) == 'yes_annotation': 158 #if str($tabular_annotation.load_annotation) == 'yes_annotation':
169 159
178 merged_annotation[is.na(merged_annotation)] = "NA" 168 merged_annotation[is.na(merged_annotation)] = "NA"
179 merged_annotation = merged_annotation[order(merged_annotation\$pixel_index),] 169 merged_annotation = merged_annotation[order(merged_annotation\$pixel_index),]
180 samples = as.factor(merged_annotation\$annotation) 170 samples = as.factor(merged_annotation\$annotation)
181 171
182 ## print annotation overview into PDF output 172 ## print annotation overview into PDF output
183
184 ## the more annotation groups a file has the smaller will be the legend
185 number_combined = length(levels(as.factor(merged_annotation\$annotation)))
186 if (number_combined<20){
187 legend_size = 10
188 }else if (number_combined>20 && number_combined<40){
189 legend_size = 9
190 }else if (number_combined>40 && number_combined<60){
191 legend_size = 8
192 }else if (number_combined>60 && number_combined<100){
193 legend_size = 7
194 }else{
195 legend_size = 6
196 }
197 173
198 combine_plot = ggplot(merged_annotation, aes(x=x, y=y, fill=annotation))+ 174 combine_plot = ggplot(merged_annotation, aes(x=x, y=y, fill=annotation))+
199 geom_tile() + 175 geom_tile() +
200 coord_fixed()+ 176 coord_fixed()+
201 ggtitle("Spatial orientation of annotated data")+ 177 ggtitle("Spatial orientation of annotated data")+
202 theme_bw()+ 178 theme_bw()+
203 theme(plot.title = element_text(hjust = 0.5))+ 179 theme(plot.title = element_text(hjust = 0.5))+
204 theme(text=element_text(family="ArialMT", face="bold", size=12))+ 180 theme(text=element_text(family="ArialMT", face="bold", size=12))+
205 theme(legend.position="bottom",legend.direction="vertical")+ 181 theme(legend.position="bottom",legend.direction="vertical")+
206 theme(legend.key.size = unit(0.2, "line"), legend.text = element_text(size = legend_size))+ 182 theme(legend.key.size = unit(0.2, "line"), legend.text = element_text(size = 8))+
207 guides(fill=guide_legend(ncol=5,byrow=TRUE)) 183 theme(legend.position="bottom",legend.direction="vertical")+
184 guides(fill=guide_legend(ncol=4,byrow=TRUE))
208 185
209 print(combine_plot) 186 print(combine_plot)
210 187
211 #end if 188 #end if
212
213 189
214 #################### Preprocessing methods ##################################### 190 #################### Preprocessing methods #####################################
215 191
216 #for $method in $methods: 192 #for $method in $methods:
217 193
228 204
229 peaks <- detectPeaks(maldi_data, method="$method.methods_conditional.peak_method", 205 peaks <- detectPeaks(maldi_data, method="$method.methods_conditional.peak_method",
230 halfWindowSize=$method.methods_conditional.halfWindowSize,SNR=$method.methods_conditional.snr) 206 halfWindowSize=$method.methods_conditional.halfWindowSize,SNR=$method.methods_conditional.snr)
231 207
232 ## QC plot and numbers 208 ## QC plot and numbers
209 ## plot old spectrum with baseline in blue and picked peaks in green
210 noise = estimateNoise(maldi_data[[1]], method= "$method.methods_conditional.peak_method")
211 plot(maldi_data[[1]], main="First spectrum with noise line (blue) and picked peaks (green)")
212 lines(noise[,1], noise[,2]*$method.methods_conditional.snr, col="blue")
213 points(peaks[[1]], col="green", pch=20)
214 ## plot new spectrum
233 plot(peaks[[1]], main="First spectrum after peak detection") 215 plot(peaks[[1]], main="First spectrum after peak detection")
234 pixel_number = length(peaks) 216 pixel_number = length(peaks)
235 minmz = round(min(unlist(lapply(peaks,mass))), digits=4) 217 minmz = round(min(unlist(lapply(peaks,mass))), digits=4)
236 maxmz = round(max(unlist(lapply(peaks,mass))), digits=4) 218 maxmz = round(max(unlist(lapply(peaks,mass))), digits=4)
237 maxfeatures = round(length(unlist(lapply(peaks,mass)))/length(peaks), digits=2) 219 mean_features = round(length(unlist(lapply(peaks,mass)))/length(peaks), digits=2)
238 medint = round(median(unlist(lapply(peaks,intensity))), digits=2) 220 medint = round(median(unlist(lapply(peaks,intensity))), digits=2)
239 peaks_picked = c(minmz, maxmz,maxfeatures, medint) 221 number_features = length(unique(unlist(lapply(peaks,mass))))
222 peaks_picked = c(minmz, maxmz,number_features,mean_features, medint)
240 QC_numbers= cbind(QC_numbers, peaks_picked) 223 QC_numbers= cbind(QC_numbers, peaks_picked)
241 vectorofactions = append(vectorofactions, "peaks_picked") 224 vectorofactions = append(vectorofactions, "peaks_picked")
242 225
243 if (length(peaks[!sapply(peaks, isEmpty)])>0){ 226 if (length(peaks[!sapply(peaks, isEmpty)])>0){
227 featureMatrix <- intensityMatrix(peaks)
244 #if $infile.ext == 'imzml' 228 #if $infile.ext == 'imzml'
245 #if str($centroids) == "FALSE" 229 #if str($centroids) == "FALSE"
246 featureMatrix <- intensityMatrix(peaks, maldi_data) 230 featureMatrix <- intensityMatrix(peaks, maldi_data)
247 #end if 231 #end if
248 #else
249 featureMatrix <- intensityMatrix(peaks)
250 #end if 232 #end if
251 featureMatrix2 =cbind(pixelnames, featureMatrix) 233 featureMatrix2 =cbind(pixelnames, featureMatrix)
252 colnames(featureMatrix2)[1] = c("mz") 234 colnames(featureMatrix2)[1] = c("mz")
253 featureMatrix2 = t(featureMatrix2) 235 featureMatrix2 = t(featureMatrix2)
254 write.table(featureMatrix2, file="$intensity_matrix", quote = FALSE, row.names = TRUE, col.names=FALSE, sep = "\t") 236 write.table(featureMatrix2, file="$intensity_matrix", quote = FALSE, row.names = TRUE, col.names=FALSE, sep = "\t")
261 ##monoisotopic peaks 243 ##monoisotopic peaks
262 244
263 peaks = monoisotopicPeaks(peaks, minCor=$method.methods_conditional.minCor, tolerance=$method.methods_conditional.tolerance, distance=$method.methods_conditional.distance, size=$method.methods_conditional.size) 245 peaks = monoisotopicPeaks(peaks, minCor=$method.methods_conditional.minCor, tolerance=$method.methods_conditional.tolerance, distance=$method.methods_conditional.distance, size=$method.methods_conditional.size)
264 246
265 ## QC plot and numbers 247 ## QC plot and numbers
248 ## plot old spectrum with picked isotopes as green dots
249 plot(peaks[[1]], main="First spectrum with picked monoisotopic peaks (green)")
250 points(peaks[[1]], col="green", pch=20)
251 ## plot new spectrum
266 plot(peaks[[1]], main="First spectrum after monoisotopic peaks detection") 252 plot(peaks[[1]], main="First spectrum after monoisotopic peaks detection")
267 minmz = round(min(unlist(lapply(peaks,mass))), digits=4) 253 minmz = round(min(unlist(lapply(peaks,mass))), digits=4)
268 maxmz = round(max(unlist(lapply(peaks,mass))), digits=4) 254 maxmz = round(max(unlist(lapply(peaks,mass))), digits=4)
269 maxfeatures = round(length(unlist(lapply(peaks,mass)))/length(peaks), digits=2) 255 mean_features = round(length(unlist(lapply(peaks,mass)))/length(peaks), digits=2)
270 medint = round(median(unlist(lapply(peaks,intensity))), digits=2) 256 medint = round(median(unlist(lapply(peaks,intensity))), digits=2)
271 monoisotopes = c(minmz, maxmz,maxfeatures, medint) 257 number_features = length(unique(unlist(lapply(peaks,mass))))
258 monoisotopes = c(minmz, maxmz,number_features,mean_features, medint)
272 QC_numbers= cbind(QC_numbers, monoisotopes) 259 QC_numbers= cbind(QC_numbers, monoisotopes)
273 vectorofactions = append(vectorofactions, "monoisotopes") 260 vectorofactions = append(vectorofactions, "monoisotopes")
274 261
275 if (length(peaks[!sapply(peaks, isEmpty)])>0){ 262 if (length(peaks[!sapply(peaks, isEmpty)])>0){
263 featureMatrix <- intensityMatrix(peaks)
264 ## only for profile imzML file: featurematrix is overwritten:
276 #if $infile.ext == 'imzml' 265 #if $infile.ext == 'imzml'
277 #if str($centroids) == "FALSE" 266 #if str($centroids) == "FALSE"
278 featureMatrix <- intensityMatrix(peaks, maldi_data) 267 featureMatrix <- intensityMatrix(peaks, maldi_data)
279 #end if 268 #end if
280 #else
281 featureMatrix <- intensityMatrix(peaks)
282 #end if 269 #end if
283 featureMatrix2 =cbind(pixelnames, featureMatrix) 270 featureMatrix2 =cbind(pixelnames, featureMatrix)
284 colnames(featureMatrix2)[1] = c("mz") 271 colnames(featureMatrix2)[1] = c("mz")
285 featureMatrix2 = t(featureMatrix2) 272 featureMatrix2 = t(featureMatrix2)
286 write.table(featureMatrix2, file="$intensity_matrix", quote = FALSE, row.names = TRUE, col.names=FALSE, sep = "\t") 273 write.table(featureMatrix2, file="$intensity_matrix", quote = FALSE, row.names = TRUE, col.names=FALSE, sep = "\t")
289 #elif str( $method.methods_conditional.method ) == 'Binning': 276 #elif str( $method.methods_conditional.method ) == 'Binning':
290 277
291 print('binning') 278 print('binning')
292 ##m/z binning 279 ##m/z binning
293 280
294 peaks <- binPeaks(peaks, tolerance=$method.methods_conditional.bin_tolerance) 281 peaks <- binPeaks(peaks, tolerance=$method.methods_conditional.bin_tolerance, method="$method.methods_conditional.bin_method")
295 282
296 ## QC plot and numbers 283 ## QC plot and numbers
297 plot(peaks[[1]], main="First spectrum after binning") 284 plot(peaks[[1]], main="First spectrum after binning")
298 minmz = round(min(unlist(lapply(peaks,mass))), digits=4) 285 minmz = round(min(unlist(lapply(peaks,mass))), digits=4)
299 maxmz = round(max(unlist(lapply(peaks,mass))), digits=4) 286 maxmz = round(max(unlist(lapply(peaks,mass))), digits=4)
300 maxfeatures = round(length(unlist(lapply(peaks,mass)))/length(peaks), digits=2) 287 mean_features = round(length(unlist(lapply(peaks,mass)))/length(peaks), digits=2)
301 medint =round( median(unlist(lapply(peaks,intensity))), digits=2) 288 medint =round( median(unlist(lapply(peaks,intensity))), digits=2)
302 binned = c(minmz, maxmz,maxfeatures, medint) 289 number_features = length(unique(unlist(lapply(peaks,mass))))
290 binned = c(minmz, maxmz,number_features,mean_features, medint)
303 QC_numbers= cbind(QC_numbers, binned) 291 QC_numbers= cbind(QC_numbers, binned)
304 vectorofactions = append(vectorofactions, "binned") 292 vectorofactions = append(vectorofactions, "binned")
305 293
306 if (length(peaks[!sapply(peaks, isEmpty)])>0){ 294 if (length(peaks[!sapply(peaks, isEmpty)])>0){
295 featureMatrix <- intensityMatrix(peaks)
307 #if $infile.ext == 'imzml' 296 #if $infile.ext == 'imzml'
308 #if str($centroids) == "FALSE" 297 #if str($centroids) == "FALSE"
309 featureMatrix <- intensityMatrix(peaks, maldi_data) 298 featureMatrix <- intensityMatrix(peaks, maldi_data)
299 #elif str($centroids) == "TRUE"
300 featureMatrix <- intensityMatrix(peaks)
310 #end if 301 #end if
311 #if str($centroids) == "TRUE"
312 featureMatrix <- intensityMatrix(peaks)
313 #end if
314 #else
315 featureMatrix <- intensityMatrix(peaks)
316 #end if 302 #end if
317 featureMatrix2 =cbind(pixelnames, featureMatrix) 303 featureMatrix2 =cbind(pixelnames, featureMatrix)
318 colnames(featureMatrix2)[1] = c("mz") 304 colnames(featureMatrix2)[1] = c("mz")
319 featureMatrix2 = t(featureMatrix2) 305 featureMatrix2 = t(featureMatrix2)
320 write.table(featureMatrix2, file="$intensity_matrix", quote = FALSE, row.names = TRUE, col.names=FALSE, sep = "\t") 306 write.table(featureMatrix2, file="$intensity_matrix", quote = FALSE, row.names = TRUE, col.names=FALSE, sep = "\t")
344 330
345 ##QC plot and numbers 331 ##QC plot and numbers
346 plot(peaks[[1]], main="First spectrum after m/z filtering") 332 plot(peaks[[1]], main="First spectrum after m/z filtering")
347 minmz = round(min(unlist(lapply(peaks,mass))), digits=4) 333 minmz = round(min(unlist(lapply(peaks,mass))), digits=4)
348 maxmz = round(max(unlist(lapply(peaks,mass))), digits=4) 334 maxmz = round(max(unlist(lapply(peaks,mass))), digits=4)
349 maxfeatures = round(length(unlist(lapply(peaks,mass)))/length(peaks), digits=2) 335 mean_features = round(length(unlist(lapply(peaks,mass)))/length(peaks), digits=2)
350 medint = round(median(unlist(lapply(peaks,intensity))), digits=2) 336 medint = round(median(unlist(lapply(peaks,intensity))), digits=2)
351 filtered = c(minmz, maxmz,maxfeatures, medint) 337 number_features = length(unique(unlist(lapply(peaks,mass))))
338 filtered = c(minmz, maxmz,number_features,mean_features, medint)
352 QC_numbers= cbind(QC_numbers, filtered) 339 QC_numbers= cbind(QC_numbers, filtered)
353 vectorofactions = append(vectorofactions, "filtered") 340 vectorofactions = append(vectorofactions, "filtered")
354 341
355 if (length(peaks[!sapply(peaks, isEmpty)])>0){ 342 if (length(peaks[!sapply(peaks, isEmpty)])>0){
343 featureMatrix <- intensityMatrix(peaks)
356 #if $infile.ext == 'imzml' 344 #if $infile.ext == 'imzml'
357 #if str($centroids) == "FALSE" 345 #if str($centroids) == "FALSE"
358 featureMatrix <- intensityMatrix(peaks, maldi_data) 346 featureMatrix <- intensityMatrix(peaks, maldi_data)
359 #end if 347 #end if
360 #else
361 featureMatrix <- intensityMatrix(peaks)
362 #end if 348 #end if
363 featureMatrix2 =cbind(pixelnames, featureMatrix) 349 featureMatrix2 =cbind(pixelnames, featureMatrix)
364 colnames(featureMatrix2)[1] = c("mz") 350 colnames(featureMatrix2)[1] = c("mz")
365 featureMatrix2 = t(featureMatrix2) 351 featureMatrix2 = t(featureMatrix2)
366 }else{print("There are no spectra with peaks left") 352 }else{print("There are no spectra with peaks left")
380 colnames(mass_peaks) = c("snr", "mass", "intensity", "spectrum") 366 colnames(mass_peaks) = c("snr", "mass", "intensity", "spectrum")
381 write.table(mass_peaks, file="$masspeaks", quote = FALSE, row.names = FALSE, col.names=TRUE, sep = "\t") 367 write.table(mass_peaks, file="$masspeaks", quote = FALSE, row.names = FALSE, col.names=TRUE, sep = "\t")
382 }else{print("There are no spectra with peaks left")} 368 }else{print("There are no spectra with peaks left")}
383 369
384 ## print table with QC values 370 ## print table with QC values
385 rownames(QC_numbers) = c("min m/z", "max mz", "# features", "median\nintensity") 371 rownames(QC_numbers) = c("min m/z", "max mz", "# features", "median \n# features", "median\nintensity")
386 plot(0,type='n',axes=FALSE,ann=FALSE) 372 plot(0,type='n',axes=FALSE,ann=FALSE)
387 grid.table(t(QC_numbers)) 373 grid.table(t(QC_numbers))
388 374
389 dev.off() 375 dev.off()
390 376
404 390
405 ]]> 391 ]]>
406 </configfile> 392 </configfile>
407 </configfiles> 393 </configfiles>
408 <inputs> 394 <inputs>
409 <param name="infile" type="data" format="imzml,tabular,rdata" label="Inputfile as imzML or Cardinal MSImageSet saved as RData" help="This file is in imzML or tabular format (peak list, peak detection cannot be run again) or Cardinal MSImageSet saved as RData"/> 395 <param name="infile" type="data" format="imzml,tabular,rdata" label="Inputfile as imzML, peaklist or Cardinal MSImageSet saved as RData" help="imzML or tabular format (peak list) or Cardinal MSImageSet saved as RData"/>
410 <param name="centroids" type="boolean" label="Is the imzML/RData data centroided (picked)" help="Choose Yes if peak detection has already been done. Peak detection cannot be run again on centroided data" truevalue="TRUE" falsevalue="FALSE"/> 396 <param name="centroids" type="boolean" label="Input data is centroided (picked)" help="Choose Yes if peak detection has already been done. Peak detection cannot be run again on centroided data" truevalue="TRUE" falsevalue="FALSE"/>
411 <conditional name="restriction_conditional"> 397 <conditional name="restriction_conditional">
412 <param name="restriction" type="select" label="Read in only spectra of interest" help="This option only works for imzML files"> 398 <param name="restriction" type="select" label="Read in only spectra of interest" help="This option only works for imzML files">
413 <option value="no_restriction" selected="True">Calculate on entire file</option> 399 <option value="no_restriction" selected="True">Calculate on entire file</option>
414 <option value="restrict">Restrict to coordinates of interest</option> 400 <option value="restrict">Restrict to coordinates of interest</option>
415 </param> 401 </param>
416 <when value="restrict"> 402 <when value="restrict">
417 <param name="coordinates_file" type="data" format="tabular" label="Tabular file with coordinates" help="x-values in first column, y-values in second column"/> 403 <param name="coordinates_file" type="data" format="tabular" label="Tabular file with coordinates" help="x-values in first column, y-values in second column"/>
418 <param name="coordinates_header" type="boolean" label="Tabular file contains a header line" truevalue="TRUE" falsevalue="FALSE"/> 404 <param name="coordinates_header" type="boolean" label="File contains a header line" truevalue="TRUE" falsevalue="FALSE"/>
419 </when> 405 </when>
420 <when value="no_restriction"/> 406 <when value="no_restriction"/>
421 </conditional> 407 </conditional>
422 408
423 <conditional name="tabular_annotation"> 409 <conditional name="tabular_annotation">
424 <param name="load_annotation" type="select" label="Use pixel annotation from tabular file - select in peak detection or filtering step where annotation should be used"> 410 <param name="load_annotation" type="select" label="Pixels have annotations" help="Annotations can be used during peak detection or filteringfsplit">
425 <option value="no_annotation" selected="True">pixels belong into one group only</option> 411 <option value="no_annotation" selected="True">pixels have no annotations</option>
426 <option value="yes_annotation">use pixel annotation from a tabular file</option> 412 <option value="yes_annotation">pixel annotation from file</option>
427 </param> 413 </param>
428 <when value="yes_annotation"> 414 <when value="yes_annotation">
429 <param name="annotation_file" type="data" format="tabular" label="Use annotations from tabular file" 415 <param name="annotation_file" type="data" format="tabular" label="Use annotations from tabular file"
430 help="Tabular file with three columns: x values, y values and pixel annotations"/> 416 help="Tabular file with three columns: x values, y values and pixel annotations"/>
431 <param name="column_x" data_ref="annotation_file" label="Column with x values" type="data_column"/> 417 <param name="column_x" data_ref="annotation_file" label="Column with x values" type="data_column"/>
432 <param name="column_y" data_ref="annotation_file" label="Column with y values" type="data_column"/> 418 <param name="column_y" data_ref="annotation_file" label="Column with y values" type="data_column"/>
433 <param name="column_names" data_ref="annotation_file" label="Column with pixel annotations" type="data_column"/> 419 <param name="column_names" data_ref="annotation_file" label="Column with pixel annotations" type="data_column"/>
434 <param name="tabular_header" type="boolean" label="Tabular file contains a header line" truevalue="TRUE" falsevalue="FALSE"/> 420 <param name="tabular_header" type="boolean" label="File contains a header line" truevalue="TRUE" falsevalue="FALSE"/>
435 </when> 421 </when>
436 <when value="no_annotation"/> 422 <when value="no_annotation"/>
437 </conditional> 423 </conditional>
438 <repeat name="methods" title="Method" min="1"> 424 <repeat name="methods" title="Method" min="1">
439 <conditional name="methods_conditional"> 425 <conditional name="methods_conditional">
447 <param name="peak_method" type="select" label="Noise estimation function"> 433 <param name="peak_method" type="select" label="Noise estimation function">
448 <option value="MAD" selected="True">MAD</option> 434 <option value="MAD" selected="True">MAD</option>
449 <option value="SuperSmoother">SuperSmoother</option> 435 <option value="SuperSmoother">SuperSmoother</option>
450 </param> 436 </param>
451 <param name="halfWindowSize" type="integer" value="20" 437 <param name="halfWindowSize" type="integer" value="20"
452 label="Half window size" 438 label="Half window size (number of data points)"
453 help="The resulting window reaches from 439 help="The resulting window reaches from
454 mass[currentIndex-halfWindowSize] to mass[currentIndex+halfWindowSize] 440 mass[currentIndex-halfWindowSize] to mass[currentIndex+halfWindowSize]
455 (window size is 2*halfWindowSize+1). 441 (window size is 2*halfWindowSize+1).
456 The best size differs depending on the selected smoothing method."/> 442 The best size differs depending on the selected smoothing method."/>
457 <param name="snr" type="integer" value="2" label="Signal-to-noise-ratio" help=""/> 443 <param name="snr" type="integer" value="2" label="Signal-to-noise-ratio" help=""/>
458 <param name="use_annotations" type="boolean" label="Generate average mass spectra for each annotation group" help="Spectra with same annotation are summarized, no imzML export possible" truevalue="TRUE" falsevalue="FALSE"/> 444 <param name="use_annotations" type="boolean" label="Detect peaks on average mass spectra" help="Spectra with same annotation are averaged before peak detection, no imzML output" truevalue="TRUE" falsevalue="FALSE"/>
459 </when> 445 </when>
460 <when value="monoisotopic_peaks"> 446 <when value="monoisotopic_peaks">
461 <param name="minCor" type="float" value="0.95" label="minimal correlation" 447 <param name="minCor" type="float" value="0.95" label="Minimal correlation"
462 help="double , minimal correlation between the peak pattern generated by the model and the experimental peaks in the MassPeaks object to be recognized as isotopic pattern"/> 448 help="Minimal correlation between the peak pattern generated by the model and the experimental peaks in the MassPeaks object to be recognized as isotopic pattern"/>
463 <param name="tolerance" type="float" label="tolerance" value="0.0004" 449 <param name="tolerance" type="float" label="Tolerance" value="0.00005"
464 help="double, maximal relative deviation of peaks position (mass) to be considered as isotopic distance"/> 450 help="Maximal relative deviation of a peak position (m/z) to be considered as identical: abs(((mass[i]+distance)-mass[i+1])/mass[i]) smaller than 'tolerance'. For 50ppm use 0.00005 or 50e-6" />
465 <param name="distance" type="float" label="distance" value="1.00235" help="double, distance between two consecutive peaks in an isotopic pattern"/> 451 <param name="distance" type="float" label="Distance" value="1.00235" help="Distance between two consecutive peaks in an isotopic pattern. 1.00235 is average distance for polypeptides."/>
466 <param name="size" type="integer" label="size" value="3" help="double, size (length) of isotopic pattern, longer patterns are prefered over shorter ones"/> 452 <param name="size" type="integer" label="Size" value="3" help="Size (length) of isotopic pattern, longer patterns are prefered over shorter ones, min size is 2."/>
467 </when> 453 </when>
468 <when value="Binning"> 454 <when value="Binning">
469 <param name="bin_tolerance" type="float" value="0.002" label="Peak binning tolerance" 455 <param name="bin_tolerance" type="float" value="0.002" label="Binning tolerance"
470 help="After the alignment the peak positions (mass) are very similar but not identical. The binning is needed to make similar peak mass values identical."/> 456 help="After the alignment the peak positions (mass) are very similar but not identical. The binning is needed to make similar peak mass values identical."/>
457 <param name="bin_method" display="radio" type="select" label="Bin creation rule" help="strict - creates bins never containing two or more peaks of the sampe sample. relaxed - allows multiple peaks of the same sample in one bin.">
458 <option value="strict" selected="True" >strict</option>
459 <option value="relaxed" >relaxed</option>
460 </param>
471 </when> 461 </when>
472 <when value="Filtering"> 462 <when value="Filtering">
473 <param name="minFrequency" type="float" value="0.25" 463 <param name="minFrequency" type="float" value="0.25"
474 label="Removal of all peaks which occur in less than minFrequency spectra" help="It is a relative threshold. The higher value from relative and absolute threshold is taken. Set one value to zero to be sure it will not be sure."/> 464 label="Removal of all peaks which occur in less than minFrequency spectra" help="Relative threshold. The higher value from relative and absolute threshold is taken. Set one value to zero to be sure it will not be used."/>
475 <param name="minNumber" type="float" value="1.0" 465 <param name="minNumber" type="float" value="1.0"
476 label="Removal of all peaks which occur in less than minNumber spectra" help="It is an absolute threshold. The higher value from relative and absolute threshold is taken. Set one value to zero to be sure it will not be sure."/> 466 label="Removal of all peaks which occur in less than minNumber spectra" help="Absolute threshold. The higher value from relative and absolute threshold is taken. Set one value to zero to be sure it will not be used."/>
477 <param name="filter_annot_groups" type="boolean" label="Group wise filtering with pixel annotations. If not specified a single group is assumed or when filtering has been done group wise it will automatically be group wise when selecting filtering on all pixel" truevalue="TRUE" falsevalue="FALSE"/> 467 <param name="filter_annot_groups" type="boolean" label="Group wise filtering with pixel annotations." help="If not specified a single group is assumed or when filtering has been done group wise it will automatically be group wise when selecting filtering on all pixel" truevalue="TRUE" falsevalue="FALSE"/>
478 <param name="mergeWhitelists" type="boolean" truevalue="TRUE" falsevalue="FALSE" 468 <param name="mergeWhitelists" type="boolean" truevalue="TRUE" falsevalue="FALSE"
479 label="mergeWhitelists" help="if FALSE the filtering criteria are applied groupwise. If TRUE peaks that survive the filtering in one group (level of labels) these peaks are also kept in other groups even if their frequencies are below minFrequency"/> 469 label="mergeWhitelists" help="Yes means that peaks that survive the filtering in one annotation group are also kept in other groups regardless if the filtering criteria are met in these groups"/>
480 </when> 470 </when>
481 </conditional> 471 </conditional>
482 </repeat> 472 </repeat>
483 <param name="export_processed" type="boolean" label="Export file as processed imzML" help="otherwise continuous imzML will be exported" checked="true" truevalue="TRUE" falsevalue="FALSE"/> 473 <param name="export_processed" type="boolean" label="Export file as processed imzML" help="otherwise continuous imzML will be exported" checked="True" truevalue="TRUE" falsevalue="FALSE"/>
484 </inputs> 474 </inputs>
485 <outputs> 475 <outputs>
486 <data format="imzml" name="outfile_imzml" label="$infile.display_name peaks"/> 476 <data format="imzml" name="outfile_imzml" label="${tool.name} on ${on_string}">
487 <data format="pdf" name="plots" from_work_dir="peaks_qc_plot.pdf" label = "$infile.display_name peakdetection QC"/> 477 <!--filter>methods_conditional['method'] == 'Peak_detection'</filter-->
488 <data format="tabular" name="masspeaks" label="$infile.display_name mass_peaks"/> 478 </data>
489 <data format="tabular" name="intensity_matrix" label="intensity_matrix"/> 479 <data format="pdf" name="plots" from_work_dir="peaks_qc_plot.pdf" label = "${tool.name} on ${on_string}: QC"/>
480 <data format="tabular" name="masspeaks" label="${tool.name} on ${on_string}: peaklist"/>
481 <data format="tabular" name="intensity_matrix" label="${tool.name} on ${on_string}: intensity_matrix"/>
490 </outputs> 482 </outputs>
491 <tests> 483 <tests>
492 <test> 484 <test>
493 <param name="infile" value="" ftype="imzml"> 485 <param name="infile" value="" ftype="imzml">
494 <composite_data value="Example_Continuous.imzML"/> 486 <composite_data value="Example_Continuous.imzML"/>
519 <param name="infile" value="masspeaks3_forinput.tabular"/> 511 <param name="infile" value="masspeaks3_forinput.tabular"/>
520 <param name="centroids" value="TRUE"/> 512 <param name="centroids" value="TRUE"/>
521 <repeat name="methods"> 513 <repeat name="methods">
522 <conditional name="methods_conditional"> 514 <conditional name="methods_conditional">
523 <param name="method" value="monoisotopic_peaks"/> 515 <param name="method" value="monoisotopic_peaks"/>
516 <param name="tolerance" value="0.0004"/>
524 </conditional> 517 </conditional>
525 </repeat> 518 </repeat>
526 <output name="plots" file="peakdetection2_QC.pdf" compare="sim_size"/> 519 <output name="plots" file="peakdetection2_QC.pdf" compare="sim_size"/>
527 <output name="masspeaks" file="masspeaks2.tabular"/> 520 <output name="masspeaks" file="masspeaks2.tabular"/>
528 <output name="intensity_matrix" file="int2.tabular"/> 521 <output name="intensity_matrix" file="int2.tabular"/>
565 </repeat> 558 </repeat>
566 <output name="plots" file="peakdetection3_QC.pdf" compare="sim_size"/> 559 <output name="plots" file="peakdetection3_QC.pdf" compare="sim_size"/>
567 <output name="intensity_matrix" file="intensity_matrix3.tabular"/> 560 <output name="intensity_matrix" file="intensity_matrix3.tabular"/>
568 <output name="masspeaks" file="masspeaks3.tabular"/> 561 <output name="masspeaks" file="masspeaks3.tabular"/>
569 </test> 562 </test>
563 <test>
564 <param name="infile" value="testfile_squares.rdata" ftype="rdata"/>
565 <param name="method" value="Peak_detection"/>
566 <param name="peak_method" value="MAD"/>
567 <param name="halfWindowSize" value="20"/>
568 <param name="snr" value="2"/>
569 <output name="plots" file="peakdetection4_QC.pdf" compare="sim_size"/>
570 <output name="intensity_matrix" file="intensity_matrix4.tabular"/>
571 <output name="masspeaks" file="masspeaks4.tabular"/>
572 </test>
570 </tests> 573 </tests>
571 <help> 574 <help>
572 <![CDATA[ 575 <![CDATA[
573 576
574 @MADLI_QUANT_DESCRIPTION@ 577 @MADLI_QUANT_DESCRIPTION@
603 3 1 606 3 1
604 ... 607 ...
605 ... 608 ...
606 609
607 610
608 - Optional: Tabular file(s) containing pixel coordinates and annotation. X and y values in separate columns and the corresponding annotation in a third column. Tabular files with any header name or no header at all are supported. The annotations can be used to summarize pixels of an imzML file which belong to the same group and detect peaks on average spectra, further steps will be done on average spectra as well and average spectra are exported. If this option was not chosen the filtering tool can use the annotations to filter for peaks within pixel groups (select "Group wise filtering"). 611 - Optional: Tabular file(s) containing pixel coordinates and annotation. X and y values in separate columns and the corresponding annotation in a third column. Tabular files with any header name or no header at all are supported. The annotations can be used to either detect peak on the average mass spectrum of each annotation group ('Detect peaks on average mass spectra') or to filter for peaks within pixel groups ('Group wise filtering'). This option is not compatible with tabular peak list inputs.
609 612
610 :: 613 ::
611 614
612 x_coord y_coord annotation 615 x_coord y_coord annotation
613 1 1 healthy 616 1 1 healthy
614 2 1 healthy 617 2 1 healthy
615 3 1 disease 618 3 1 disease
616 ... 619 ...
617 ... 620 ...
618 621
619
620 **Options** 622 **Options**
621 623
622 - Peak detection: detection of peaks, only possible with profile mode input 624 - Peak detection: detection of peaks, only possible with profile mode input. A peak is a local maximum above a user defined noise threshold.
625
626 - Two functions exist to estimate the noise: MAD and supersmoother.
627 - SNR is an abbreviation for signal-to-noise-ratio. A local maximum has to be higher than SNR*noise to be recognize as peak.
628 - 'halfWindowSize': The resulting window reaches from mass[currentIndex-halfWindowSize] to mass[currentIndex+halfWindowSize]. A local maximum has to be the highest one in the given window to be recognized as peak.
629 - Detect peaks on average mass spectra: Spectra with the same annotation (taken from the annotation tabular input) are averaged and peak picking is performed on the average spectrum of each annotation group. The exported imzML is empty and cannot be used for further analysis steps. The peaklist and intensity matrix outputs contain the annotation group names with their averaged intensity values. Filtering steps have to be done in the same run as the peak picking.
630
631
623 - Monoisotopic peaks: detection of monoisotopic peaks 632 - Monoisotopic peaks: detection of monoisotopic peaks
624 - Peak binning: After the alignment the peak positions (m/z) are very similar but not identical. The binning is needed to make similar peak m/z values identical. 633
625 - Peak filtering: Removal of less frequent peaks (either with a minimum ratio or with an absolute minimum number of spectra in which the peak has to occur) 634
635
636
637 - Peak binning: After the alignment the peak positions (m/z) are very similar but not identical. The binning is needed to make similar peak m/z values identical. The algorithm is based on the following workflow:
638
639 1. Put all mass in a sorted vector.
640 2. Calculate differences between each neighbor.
641 3. Divide the mass vector at the largest gap (largest difference) and form a left and a right bin.
642 4. Rerun step 3 for the left and/or the right bin if they don't fulfill the following criteria:
643
644 - All peaks in a bin are near to the mean (abs(mass-meanMass)/meanMass < tolerance).
645 - method == "strict": The bin doesn't contain two or more peaks of the same sample.
646
647
648 - Peak filtering: Removal of less frequent m/z features:
649
650 - minFrequency : between 0 and 1: m/z has to occur in 0 - 100% of all spectra; minNumber: m/z has to occur in at least this amount of spectra --> out of those two criteria the stricter value will be used
651 - Group wise filtering with pixel annotations: 'Yes' means that the filtering criteria are applied for each annotation group separately.
652 - mergeWhitelists: 'Yes' means that peaks that survive the filtering in one annotation group are also kept in other groups regardless if the filtering criteria are met in these groups
653 - To filter data that was averaged before peak detection: Filtering has to be done in the same tool run as the peak detection. The filtering criteria are automatically applied per annotation group (Group wise filtering can be 'No') and not per pixel. Example: to keep only m/z that were detected in at least half of all annotation groups set minFrequency to 0.5.
626 654
627 655
628 **Output** 656 **Output**
629 657
630 - centroided imzML file (processed or continuous), except for peak picking on the average of multiple spectra 658 - centroided imzML file (processed or continuous), imzML file is empty when 'Detect peaks on average mass spectra' is chosen.
631 - pdf with mass spectra plots after each preprocessing step 659 - pdf with mass spectra plots after each preprocessing step and a table with key values after each preprocessing step
632 - peak list (tabular file) with the columns "snr", "mass", "intensity" and "spectrum" 660 - peak list (tabular file) with the columns "snr", "mass", "intensity" and "spectrum"
633 - tabular file with intensity matrix (m/z in rows and spectra in columns). If the input file was imzML in profile mode the intensities before peak picking are also stored in the matrix . For all other inputs not picked values are set to NA. For peak picking on the average of multiple spectra, each spectra group is a column with mean intensities for each m/z 661 - tabular file with intensity matrix (m/z in rows and spectra in columns). If the input file was imzML in profile mode the intensities before peak picking are also stored in the matrix . For all other inputs not picked values are set to NA. For peak picking on the average of multiple spectra, each spectra group is a column with mean intensities for each m/z.
634 662
635 .. _MALDIquant: http://strimmerlab.org/software/maldiquant/ 663 .. _MALDIquant: http://strimmerlab.org/software/maldiquant/
636 664
637 ]]> 665 ]]>
638 </help> 666 </help>