comparison maldi_quant_preprocessing.xml @ 3:71411ac28268 draft

planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/MALDIquant commit f127be2141cf22e269c85282d226eb16fe14a9c1
author galaxyp
date Fri, 15 Feb 2019 10:26:00 -0500
parents e754c2b545a9
children 60ee8c592b13
comparison
equal deleted inserted replaced
2:e754c2b545a9 3:71411ac28268
1 <tool id="maldi_quant_preprocessing" name="MALDIquant preprocessing" version="@VERSION@.2"> 1 <tool id="maldi_quant_preprocessing" name="MALDIquant preprocessing" version="@VERSION@.3">
2 <description> 2 <description>
3 Preprocessing of mass-spectrometry imaging data 3 Preprocessing of mass-spectrometry imaging data
4 </description> 4 </description>
5 <macros> 5 <macros>
6 <import>maldi_macros.xml</import> 6 <import>maldi_macros.xml</import>
40 40
41 print('Reading mask region') 41 print('Reading mask region')
42 ## Import imzML file 42 ## Import imzML file
43 43
44 coordinate_matrix = as.matrix(read.delim("$restriction_conditional.coordinates_file", header = $restriction_conditional.coordinates_header, stringsAsFactors = FALSE))[,1:2] 44 coordinate_matrix = as.matrix(read.delim("$restriction_conditional.coordinates_file", header = $restriction_conditional.coordinates_header, stringsAsFactors = FALSE))[,1:2]
45 coordinate_matrix = coordinate_matrix[,c($restriction_conditional.column_x, $restriction_conditional.column_y)]
45 46
46 maldi_data = importImzMl('infile.imzML', 47 maldi_data = importImzMl('infile.imzML',
47 coordinates = coordinate_matrix) 48 coordinates = coordinate_matrix)
48 pixelnames = paste0("x = ", coordinates(maldi_data)[,1],", y = ", coordinates(maldi_data)[,2]) 49 pixelnames = paste0("x = ", coordinates(maldi_data)[,1],", y = ", coordinates(maldi_data)[,2])
49 50
58 ## Import analyze7.5 file 59 ## Import analyze7.5 file
59 maldi_data = importAnalyze( 'infile.hdr' ) 60 maldi_data = importAnalyze( 'infile.hdr' )
60 coordinates_info = cbind(coordinates(maldi_data)[,1:2], c(1:length(maldi_data))) 61 coordinates_info = cbind(coordinates(maldi_data)[,1:2], c(1:length(maldi_data)))
61 #else 62 #else
62 loadRData <- function(fileName){ 63 loadRData <- function(fileName){
63 #loads an RData file, and returns it 64 ##loads an RData file, and returns it
64 load(fileName) 65 load(fileName)
65 get(ls()[ls() != "fileName"]) 66 get(ls()[ls() != "fileName"])
66 } 67 }
67 msidata = loadRData('infile.RData') 68 msidata = loadRData('infile.RData')
68 ## save coordinates 69 ## save coordinates
69 cardinal_coordinates = as.matrix(Cardinal::coord(msidata)[,1:2]) 70 cardinal_coordinates = as.matrix(Cardinal::coord(msidata)[,1:2])
70 ## save mz values 71 ## save mz values
71 cardinal_mzs = Cardinal::mz(msidata) 72 cardinal_mzs = Cardinal::mz(msidata)
72 ## create MALDIquant MassSpectrum object 73 ## create MALDIquant MassSpectrum object, order of pixels in iData is same as in coord(msidata):
73 maldi_data = list() 74 maldi_data = list()
74 for(number_spectra in 1:ncol(msidata)){ 75 for(number_spectra in 1:ncol(msidata)){
75 maldi_data[[number_spectra]] = createMassSpectrum(mass = cardinal_mzs, intensity = iData(msidata)[,number_spectra]) 76 maldi_data[[number_spectra]] = MALDIquant::createMassSpectrum(mass = cardinal_mzs, intensity = iData(msidata)[,number_spectra])
76 coordinates_info = cbind(cardinal_coordinates, c(1:length(maldi_data)))
77 } 77 }
78
79 #end if 78 #end if
80 79
81 #end if 80 #end if
82 81
83 ## Quality control plots during preprocessing 82 ## Quality control plots during preprocessing
87 86
88 ## if no filename is given, name of file in Galaxy history is used 87 ## if no filename is given, name of file in Galaxy history is used
89 #set $filename = $infile.display_name 88 #set $filename = $infile.display_name
90 title(main=paste("$filename")) 89 title(main=paste("$filename"))
91 90
92 #if str($tabular_annotation.load_annotation) == 'yes_annotation':
93 print("use annotation file")
94
95 ## read and extract x,y,annotation information
96 input_tabular = read.delim("$tabular_annotation.annotation_file", header = $tabular_annotation.tabular_header, stringsAsFactors = FALSE)
97 annotation_input = input_tabular[,c($tabular_annotation.column_x, $tabular_annotation.column_y, $tabular_annotation.column_names)]
98 colnames(annotation_input) = c("x", "y", "annotation") ## rename annotations header to default name "annotation"
99
100 ## merge with coordinate information (from above) of MSI data
101 colnames(coordinates_info)[3] = "pixel_index"
102 merged_annotation = merge(coordinates_info, annotation_input, by=c("x", "y"), all.x=TRUE)
103 merged_annotation[is.na(merged_annotation)] = "NA"
104 merged_annotation = merged_annotation[order(merged_annotation\$pixel_index),]
105 samples = as.factor(merged_annotation\$annotation)
106
107 ## print annotation overview into PDF output
108
109 ## the more annotation groups a file has the smaller will be the legend
110 number_combined = length(levels(as.factor(merged_annotation\$annotation)))
111 if (number_combined<20){
112 legend_size = 10
113 }else if (number_combined>20 && number_combined<40){
114 legend_size = 9
115 }else if (number_combined>40 && number_combined<60){
116 legend_size = 8
117 }else if (number_combined>60 && number_combined<100){
118 legend_size = 7
119 }else{
120 legend_size = 6
121 }
122
123 combine_plot = ggplot(merged_annotation, aes(x=x, y=y, fill=annotation))+
124 geom_tile() +
125 coord_fixed()+
126 ggtitle("Spatial orientation of annotated data")+
127 theme_bw()+
128 theme(plot.title = element_text(hjust = 0.5))+
129 theme(text=element_text(family="ArialMT", face="bold", size=12))+
130 theme(legend.position="bottom",legend.direction="vertical")+
131 theme(legend.key.size = unit(0.2, "line"), legend.text = element_text(size = legend_size))+
132 guides(fill=guide_legend(ncol=5,byrow=TRUE))
133
134 print(combine_plot)
135
136 #end if
137 91
138 #################### Preprocessing methods ##################################### 92 #################### Preprocessing methods #####################################
139 93
140 ## QC plot on input file 94 ## QC plot on input file
141 avgSpectra = averageMassSpectra(maldi_data,method="mean") 95 avgSpectra = averageMassSpectra(maldi_data,method="mean")
145 minmz = round(min(unlist(lapply(maldi_data,mass))), digits=4) 99 minmz = round(min(unlist(lapply(maldi_data,mass))), digits=4)
146 maxmz = round(max(unlist(lapply(maldi_data,mass))), digits=4) 100 maxmz = round(max(unlist(lapply(maldi_data,mass))), digits=4)
147 mean_features = round(length(unlist(lapply(maldi_data,mass)))/length(maldi_data), digits=2) 101 mean_features = round(length(unlist(lapply(maldi_data,mass)))/length(maldi_data), digits=2)
148 number_features = length(unique(unlist(lapply(maldi_data,mass)))) 102 number_features = length(unique(unlist(lapply(maldi_data,mass))))
149 medint = round(median(unlist(lapply(maldi_data,intensity))), digits=2) 103 medint = round(median(unlist(lapply(maldi_data,intensity))), digits=2)
150 inputdata = c(minmz, maxmz,number_features,mean_features, medint) 104 inputdata = c(minmz, maxmz,number_features,mean_features,medint, pixel_number)
151 QC_numbers= data.frame(inputdata = c(minmz, maxmz,number_features, mean_features, medint)) 105 QC_numbers= data.frame(inputdata = c(minmz, maxmz,number_features, mean_features, medint, pixel_number))
152 vectorofactions = "inputdata" 106 vectorofactions = "inputdata"
153 107
154 108
155 #for $method in $methods: 109 #for $method in $methods:
156 110
166 minmz = round(min(unlist(lapply(maldi_data,mass))), digits=4) 120 minmz = round(min(unlist(lapply(maldi_data,mass))), digits=4)
167 maxmz = round(max(unlist(lapply(maldi_data,mass))), digits=4) 121 maxmz = round(max(unlist(lapply(maldi_data,mass))), digits=4)
168 mean_features = round(length(unlist(lapply(maldi_data,mass)))/length(maldi_data), digits=2) 122 mean_features = round(length(unlist(lapply(maldi_data,mass)))/length(maldi_data), digits=2)
169 medint = round(median(unlist(lapply(maldi_data,intensity))), digits=2) 123 medint = round(median(unlist(lapply(maldi_data,intensity))), digits=2)
170 number_features = length(unique(unlist(lapply(maldi_data,mass)))) 124 number_features = length(unique(unlist(lapply(maldi_data,mass))))
171 transformed = c(minmz, maxmz,number_features,mean_features, medint) 125 transformed = c(minmz, maxmz,number_features,mean_features,medint,pixel_number)
172 QC_numbers= cbind(QC_numbers, transformed) 126 QC_numbers= cbind(QC_numbers, transformed)
173 vectorofactions = append(vectorofactions, "transformed") 127 vectorofactions = append(vectorofactions, "transformed")
174 128
175 129
176 #elif str( $method.methods_conditional.method ) == 'Smoothing': 130 #elif str( $method.methods_conditional.method ) == 'Smoothing':
201 minmz = round(min(unlist(lapply(maldi_data,mass))), digits=4) 155 minmz = round(min(unlist(lapply(maldi_data,mass))), digits=4)
202 maxmz = round(max(unlist(lapply(maldi_data,mass))), digits=4) 156 maxmz = round(max(unlist(lapply(maldi_data,mass))), digits=4)
203 mean_features = round(length(unlist(lapply(maldi_data,mass)))/length(maldi_data), digits=2) 157 mean_features = round(length(unlist(lapply(maldi_data,mass)))/length(maldi_data), digits=2)
204 medint = round(median(unlist(lapply(maldi_data,intensity))), digits=2) 158 medint = round(median(unlist(lapply(maldi_data,intensity))), digits=2)
205 number_features = length(unique(unlist(lapply(maldi_data,mass)))) 159 number_features = length(unique(unlist(lapply(maldi_data,mass))))
206 smoothed = c(minmz, maxmz,number_features,mean_features, medint) 160 smoothed = c(minmz, maxmz,number_features,mean_features,medint,pixel_number)
207 QC_numbers= cbind(QC_numbers, smoothed) 161 QC_numbers= cbind(QC_numbers, smoothed)
208 vectorofactions = append(vectorofactions, "smoothed") 162 vectorofactions = append(vectorofactions, "smoothed")
209 163
210 164
211 #elif str( $method.methods_conditional.method ) == 'Baseline': 165 #elif str( $method.methods_conditional.method ) == 'Baseline':
212 166
213 print('baseline removing') 167 print('baseline removing')
214 ## Remove baseline 168 ## Remove baseline
215 169
170 ## Choose random spectra for QC plots
171 random_spectra = sample(1:length(maldi_data), 4, replace=FALSE)
172
216 #if str($method.methods_conditional.methods_for_baseline.baseline_method ) == 'SNIP': 173 #if str($method.methods_conditional.methods_for_baseline.baseline_method ) == 'SNIP':
217 print('SNIP') 174 print('SNIP')
218 random_spectra = sample(1:length(maldi_data), 4, replace=FALSE) 175
219 par(mfrow = c(2,2)) 176 par(mfrow = c(2,2))
220 for (random_sample in random_spectra){ 177 for (random_sample in random_spectra){
221 maldi_data_baseline = estimateBaseline(maldi_data[[random_sample]], 178 maldi_data_baseline = estimateBaseline(maldi_data[[random_sample]],
222 method="SNIP", iterations=$method.methods_conditional.methods_for_baseline.iterations) 179 method="SNIP", iterations=$method.methods_conditional.methods_for_baseline.iterations)
223 plot(maldi_data[[random_sample]], sub="", main=paste0("Estimated baseline for spectrum ", random_sample)) 180 plot(maldi_data[[random_sample]], sub="", main=paste0("Estimated baseline for spectrum ", random_sample))
225 182
226 maldi_data = removeBaseline(maldi_data, 183 maldi_data = removeBaseline(maldi_data,
227 method="SNIP", 184 method="SNIP",
228 iterations=$method.methods_conditional.methods_for_baseline.iterations) 185 iterations=$method.methods_conditional.methods_for_baseline.iterations)
229 186
187
230 #elif str($method.methods_conditional.methods_for_baseline.baseline_method ) == 'TopHat': 188 #elif str($method.methods_conditional.methods_for_baseline.baseline_method ) == 'TopHat':
231 print('TopHat') 189 print('TopHat')
190
191 par(mfrow = c(2,2))
192 for (random_sample in random_spectra){
193 maldi_data_baseline = estimateBaseline(maldi_data[[random_sample]],
194 method="TopHat", halfWindowSize=$method.methods_conditional.methods_for_baseline.tophat_halfWindowSize)
195 plot(maldi_data[[random_sample]], sub="", main=paste0("Estimated baseline for spectrum ", random_sample))
196 lines(maldi_data_baseline, col="blue", lwd=2)}
232 197
233 maldi_data = removeBaseline(maldi_data, 198 maldi_data = removeBaseline(maldi_data,
234 method="TopHat", 199 method="TopHat",
235 halfWindowSize=$method.methods_conditional.methods_for_baseline.tophat_halfWindowSize) 200 halfWindowSize=$method.methods_conditional.methods_for_baseline.tophat_halfWindowSize)
236 201
202
237 #elif str($method.methods_conditional.methods_for_baseline.baseline_method ) == 'ConvexHull': 203 #elif str($method.methods_conditional.methods_for_baseline.baseline_method ) == 'ConvexHull':
238 print('ConvexHull') 204 print('ConvexHull')
239 205
206 par(mfrow = c(2,2))
207 for (random_sample in random_spectra){
208 maldi_data_baseline = estimateBaseline(maldi_data[[random_sample]],
209 method="ConvexHull")
210 plot(maldi_data[[random_sample]], sub="", main=paste0("Estimated baseline for spectrum ", random_sample))
211 lines(maldi_data_baseline, col="blue", lwd=2)}
212
240 maldi_data = removeBaseline(maldi_data, 213 maldi_data = removeBaseline(maldi_data,
241 method="ConvecHull") 214 method="ConvexHull")
215
242 216
243 #elif str($method.methods_conditional.methods_for_baseline.baseline_method ) == 'median': 217 #elif str($method.methods_conditional.methods_for_baseline.baseline_method ) == 'median':
244 print('median') 218 print('median')
245 219
220 par(mfrow = c(2,2))
221 for (random_sample in random_spectra){
222 maldi_data_baseline = estimateBaseline(maldi_data[[random_sample]],
223 method="median", halfWindowSize=$method.methods_conditional.methods_for_baseline.median_halfWindowSize)
224 plot(maldi_data[[random_sample]], sub="", main=paste0("Estimated baseline for spectrum ", random_sample))
225 lines(maldi_data_baseline, col="blue", lwd=2)}
226
246 maldi_data = removeBaseline(maldi_data, 227 maldi_data = removeBaseline(maldi_data,
247 method="TopHat", 228 method="median",
248 halfWindowSize=$method.methods_conditional.methods_for_baseline.median_halfWindowSize) 229 halfWindowSize=$method.methods_conditional.methods_for_baseline.median_halfWindowSize)
249 230
250 #end if 231 #end if
251 232
252 ## QC plot and numbers 233 ## QC plot and numbers
257 minmz = round(min(unlist(lapply(maldi_data,mass))), digits=4) 238 minmz = round(min(unlist(lapply(maldi_data,mass))), digits=4)
258 maxmz = round(max(unlist(lapply(maldi_data,mass))), digits=4) 239 maxmz = round(max(unlist(lapply(maldi_data,mass))), digits=4)
259 mean_features = round(length(unlist(lapply(maldi_data,mass)))/length(maldi_data), digits=2) 240 mean_features = round(length(unlist(lapply(maldi_data,mass)))/length(maldi_data), digits=2)
260 medint = round(median(unlist(lapply(maldi_data,intensity))), digits=2) 241 medint = round(median(unlist(lapply(maldi_data,intensity))), digits=2)
261 number_features = length(unique(unlist(lapply(maldi_data,mass)))) 242 number_features = length(unique(unlist(lapply(maldi_data,mass))))
262 baseline_removed = c(minmz, maxmz,number_features,mean_features, medint) 243 baseline_removed = c(minmz, maxmz,number_features,mean_features,medint,pixel_number)
263 QC_numbers= cbind(QC_numbers, baseline_removed) 244 QC_numbers= cbind(QC_numbers, baseline_removed)
264 vectorofactions = append(vectorofactions, "baseline_removed") 245 vectorofactions = append(vectorofactions, "bl_removed")
265 246
266 247
267 #elif str( $method.methods_conditional.method ) == 'Calibrate': 248 #elif str( $method.methods_conditional.method ) == 'Calibrate':
268 249
269 print('calibrate') 250 print('calibrate')
285 minmz = round(min(unlist(lapply(maldi_data,mass))), digits=4) 266 minmz = round(min(unlist(lapply(maldi_data,mass))), digits=4)
286 maxmz = round(max(unlist(lapply(maldi_data,mass))), digits=4) 267 maxmz = round(max(unlist(lapply(maldi_data,mass))), digits=4)
287 mean_features = round(length(unlist(lapply(maldi_data,mass)))/length(maldi_data), digits=2) 268 mean_features = round(length(unlist(lapply(maldi_data,mass)))/length(maldi_data), digits=2)
288 medint = round(median(unlist(lapply(maldi_data,intensity))), digits=2) 269 medint = round(median(unlist(lapply(maldi_data,intensity))), digits=2)
289 number_features = length(unique(unlist(lapply(maldi_data,mass)))) 270 number_features = length(unique(unlist(lapply(maldi_data,mass))))
290 intensity_calibrated = c(minmz, maxmz,number_features,mean_features, medint) 271 intensity_calibrated = c(minmz, maxmz,number_features,mean_features,medint,pixel_number)
291 QC_numbers= cbind(QC_numbers, intensity_calibrated) 272 QC_numbers= cbind(QC_numbers, intensity_calibrated)
292 vectorofactions = append(vectorofactions, "intensity_calibrated ") 273 vectorofactions = append(vectorofactions, "calibrated")
293 274
294 275
295 #elif str( $method.methods_conditional.method ) == 'Align': 276 #elif str( $method.methods_conditional.method ) == 'Align':
296 277
297 print('align') 278 print('align')
298 ##align spectra 279 ##align spectra with 3 separate functions
280
281 ## create reference if needed
282
283 ## 1) detect peaks:
284 peaks <- detectPeaks(maldi_data, halfWindowSize=$method.methods_conditional.halfWindowSize,
285 method="$method.methods_conditional.peak_method", SNR=$method.methods_conditional.snr)
286
299 287
300 #if str($method.methods_conditional.reference_for_alignment.align_ref) == 'no_reference': 288 #if str($method.methods_conditional.reference_for_alignment.align_ref) == 'no_reference':
301 maldi_data = alignSpectra(maldi_data, halfWindowSize=$method.methods_conditional.halfWindowSize, 289
302 SNR=$method.methods_conditional.snr, tolerance=$method.methods_conditional.tolerance, 290 ## 2) calculate warping:
303 allowNoMatches =$method.methods_conditional.allow_nomatch, emptyNoMatches = $method.methods_conditional.empty_nomatch, 291 warping_function <- determineWarpingFunctions(peaks,
304 warpingMethod="$method.methods_conditional.warping_method") 292 tolerance=$method.methods_conditional.tolerance, method="$method.methods_conditional.warping_method",
293 allowNoMatches=$method.methods_conditional.allow_nomatch, minFrequency = $method.methods_conditional.reference_for_alignment.min_frequency)
294
295 ## 3) warp spectra:
296 maldi_data = warpMassSpectra(maldi_data, warping_function, emptyNoMatches=$method.methods_conditional.empty_nomatch)
297
305 298
306 #elif str($method.methods_conditional.reference_for_alignment.align_ref) == 'yes_reference': 299 #elif str($method.methods_conditional.reference_for_alignment.align_ref) == 'yes_reference':
300
307 ## create reference mass_vector from tabular file 301 ## create reference mass_vector from tabular file
308 mass_vector = read.delim("$method.methods_conditional.reference_for_alignment.reference_file", header = $method.methods_conditional.reference_for_alignment.reference_header, stringsAsFactors = FALSE)[,1] 302 mass_vector = read.delim("$method.methods_conditional.reference_for_alignment.reference_file", header = $method.methods_conditional.reference_for_alignment.reference_header, stringsAsFactors = FALSE)[,$method.methods_conditional.reference_for_alignment.mz_column]
309 int_vector = rep(1,length(mass_vector)) 303 int_vector = rep(1,length(mass_vector))
310 mass_list = createMassPeaks(mass_vector, int_vector) 304 mass_list = createMassPeaks(mass_vector, int_vector)
311 305
312 maldi_data = alignSpectra(maldi_data, halfWindowSize=$method.methods_conditional.halfWindowSize, 306 #if str($method.methods_conditional.reference_for_alignment.separate_alignment) == "FALSE"
313 SNR=$method.methods_conditional.snr, 307 print('default alignment')
314 tolerance=$method.methods_conditional.tolerance, 308
315 warpingMethod="$method.methods_conditional.warping_method", 309 ## 2) calculate warping:
316 reference = mass_list, allowNoMatches =$method.methods_conditional.allow_nomatch, emptyNoMatches = $method.methods_conditional.empty_nomatch) 310 warping_function <- determineWarpingFunctions(peaks,
317 311 tolerance=$method.methods_conditional.tolerance, method="$method.methods_conditional.warping_method",
312 allowNoMatches=$method.methods_conditional.allow_nomatch, reference = mass_list)
313
314 ## 3) warp spectra:
315 maldi_data = warpMassSpectra(maldi_data, warping_function, emptyNoMatches=$method.methods_conditional.empty_nomatch)
316
317 #elif str($method.methods_conditional.reference_for_alignment.separate_alignment) == "TRUE"
318 print('spectra wise alignment')
319
320 maldi_data_new_list =list()
321
322 for (pixelnb in 1:length(peaks))
323 {
324 ## 2) calculate warping:
325 warping_function <- determineWarpingFunctions(peaks[[pixelnb]],
326 tolerance=$method.methods_conditional.tolerance, method="$method.methods_conditional.warping_method",
327 allowNoMatches=$method.methods_conditional.allow_nomatch, reference = mass_list)
328
329 ## 3) warp spectra:
330 maldi_data_new = warpMassSpectra(list(maldi_data[[pixelnb]]), warping_function, emptyNoMatches=$method.methods_conditional.empty_nomatch)
331 maldi_data_new_list = c(maldi_data_new_list, maldi_data_new)
332
333 }
334 maldi_data = maldi_data_new_list
335 #end if
318 #end if 336 #end if
319 337
338
320 #if $method.methods_conditional.remove_empty: 339 #if $method.methods_conditional.remove_empty:
321 print("remove empty spectra") 340 print(paste(length(findEmptyMassObjects(maldi_data)), " empty spectra were removed", sep=" "))
322 341
323 #if $infile.ext == 'rdata' 342 ## only if there are empty spectra to remove
324 cardinal_coordinates = cardinal_coordinates[-findEmptyMassObjects(maldi_data),] ## remove coordinates of empty spectra for Cardinal RData input 343
325 #end if 344 if (length(findEmptyMassObjects(maldi_data))>0)
326 #if str($tabular_annotation.load_annotation) == 'yes_annotation': 345
327 merged_annotation = merged_annotation[-findEmptyMassObjects(maldi_data),] ## remove coordinate annotations for empty spectra 346 {
328 #end if 347 #if $infile.ext == 'rdata'
329 maldi_data = removeEmptyMassObjects(maldi_data) 348 cardinal_coordinates = cardinal_coordinates[-findEmptyMassObjects(maldi_data),,drop=FALSE] ## remove coordinates of empty spectra for Cardinal RData input
349 #end if
350
351 maldi_data = removeEmptyMassObjects(maldi_data)
352 }
330 #end if 353 #end if
331
332 354
333 ## QC plot 355 ## QC plot
334 356
335 if (length(maldi_data)>0){ 357 if (length(maldi_data)>0){
336 avgSpectra = averageMassSpectra(maldi_data,method="mean") 358 avgSpectra = averageMassSpectra(maldi_data,method="mean")
337 plot(avgSpectra, main="Average spectrum after alignment") 359 plot(avgSpectra, main="Average spectrum after alignment")
338 }else{"All spectra are empty"} 360 medint = round(median(unlist(lapply(maldi_data,intensity))), digits=2)
361 }else{print("All spectra are empty")}
339 362
340 pixel_number = length(maldi_data) 363 pixel_number = length(maldi_data)
341 minmz = round(min(unlist(lapply(maldi_data,mass))), digits=4) 364 minmz = round(min(unlist(lapply(maldi_data,mass))), digits=4)
342 maxmz = round(max(unlist(lapply(maldi_data,mass))), digits=4) 365 maxmz = round(max(unlist(lapply(maldi_data,mass))), digits=4)
343 mean_features = round(length(unlist(lapply(maldi_data,mass)))/length(maldi_data), digits=2) 366 mean_features = round(length(unlist(lapply(maldi_data,mass)))/length(maldi_data), digits=2)
344 medint = round(median(unlist(lapply(maldi_data,intensity))), digits=2) 367
345 number_features = length(unique(unlist(lapply(maldi_data,mass)))) 368 number_features = length(unique(unlist(lapply(maldi_data,mass))))
346 spectra_aligned = c(minmz, maxmz,number_features,mean_features, medint) 369 spectra_aligned = c(minmz, maxmz,number_features,mean_features, medint,pixel_number)
347 QC_numbers= cbind(QC_numbers, spectra_aligned) 370 QC_numbers= cbind(QC_numbers, spectra_aligned)
348 vectorofactions = append(vectorofactions, "spectra_aligned") 371 vectorofactions = append(vectorofactions, "aligned")
372
373 #elif str( $method.methods_conditional.method ) == 'skip_preprocessing':
374 ##for now as option to filter large files
375
349 #end if 376 #end if
350 377
351 #end for 378 #end for
352 379
353 rownames(QC_numbers) = c("min m/z", "max mz", "# features", "median \n# features", "median\nintensity") 380 rownames(QC_numbers) = c("min m/z", "max mz", "# features", "median \n# peaks (int.>0)", "median\nintensity", "pixel\nnumber")
381 colnames(QC_numbers) = vectorofactions
354 plot(0,type='n',axes=FALSE,ann=FALSE) 382 plot(0,type='n',axes=FALSE,ann=FALSE)
355 grid.table(t(QC_numbers)) 383 grid.table(t(QC_numbers))
356 384
357 dev.off() 385 dev.off()
358 386
368 396
369 ]]> 397 ]]>
370 </configfile> 398 </configfile>
371 </configfiles> 399 </configfiles>
372 <inputs> 400 <inputs>
373 <param name="infile" type="data" format="imzml,rdata,analyze75" label="Inputfile as imzML or Cardinal MSImageSet saved as RData" help="This file is in imzML format or Cardinal MSImageSet saved as RData. The file must be in profile mode, not centroided"/> 401 <param name="infile" type="data" format="imzml,rdata,analyze75" label="MSI data" help="Input file as imzML (composite upload), or Cardinal MSImageSet saved as RData (regular upload). The file must be in profile mode, not centroided."/>
374 <conditional name="restriction_conditional"> 402 <conditional name="restriction_conditional">
375 <param name="restriction" type="select" label="Read in only spectra of interest" help="This option only works for imzML files"> 403 <param name="restriction" type="select" label="Use only spectra of interest" help="This option only works for imzML files">
376 <option value="no_restriction" selected="True">Calculate on entire file</option> 404 <option value="no_restriction" selected="True">No, calculate on entire file</option>
377 <option value="restrict">Restrict to coordinates of interest</option> 405 <option value="restrict">Yes, restrict to spectra of interest</option>
378 </param> 406 </param>
379 <when value="restrict"> 407 <when value="restrict">
380 <param name="coordinates_file" type="data" format="tabular" label="Tabular file with coordinates" help="x-values in first column, y-values in second column"/> 408 <param name="coordinates_file" type="data" format="tabular" label="Tabular file with coordinates"/>
409 <param name="column_x" data_ref="coordinates_file" label="Column with x values" type="data_column"/>
410 <param name="column_y" data_ref="coordinates_file" label="Column with y values" type="data_column"/>
381 <param name="coordinates_header" type="boolean" label="File contains a header line" truevalue="TRUE" falsevalue="FALSE"/> 411 <param name="coordinates_header" type="boolean" label="File contains a header line" truevalue="TRUE" falsevalue="FALSE"/>
382 </when> 412 </when>
383 <when value="no_restriction"/> 413 <when value="no_restriction"/>
384 </conditional>
385 <conditional name="tabular_annotation">
386 <param name="load_annotation" type="select" label="For Cardinal RData only: Use pixel annotation from tabular file to have updated annotation information in case empty spectra will be removed">
387 <option value="no_annotation" selected="True">use no annotation</option>
388 <option value="yes_annotation">use pixel annotation from a tabular file</option>
389 </param>
390 <when value="yes_annotation">
391 <param name="annotation_file" type="data" format="tabular" label="Use annotations from tabular file"
392 help="Tabular file with three columns: x values, y values and pixel annotations"/>
393 <param name="column_x" data_ref="annotation_file" label="Column with x values" type="data_column"/>
394 <param name="column_y" data_ref="annotation_file" label="Column with y values" type="data_column"/>
395 <param name="column_names" data_ref="annotation_file" label="Column with pixel annotations" type="data_column"/>
396 <param name="tabular_header" type="boolean" label="File contains a header line" truevalue="TRUE" falsevalue="FALSE"/>
397 </when>
398 <when value="no_annotation"/>
399 </conditional> 414 </conditional>
400 <repeat name="methods" title="Method" min="1"> 415 <repeat name="methods" title="Method" min="1">
401 <conditional name="methods_conditional"> 416 <conditional name="methods_conditional">
402 <param name="method" type="select" label="Select a method"> 417 <param name="method" type="select" label="Select a method">
403 <option value="Transformation" selected="True">Transformation</option> 418 <option value="Transformation" selected="True">Transformation</option>
404 <option value="Smoothing">Smoothing</option> 419 <option value="Smoothing">Smoothing</option>
405 <option value="Baseline">Baseline removal</option> 420 <option value="Baseline">Baseline removal</option>
406 <option value="Calibrate">Calibrate</option> 421 <option value="Calibrate">Intensity calibration (normalization)</option>
407 <option value="Align">Align Spectra (warping/phase correction)</option> 422 <option value="Align">Align spectra (warping/phase correction)</option>
423 <option value="skip_preprocessing">Skip preprocessing</option>
408 <validator type="empty_field" /> 424 <validator type="empty_field" />
409 </param> 425 </param>
410 <when value="Transformation"> 426 <when value="Transformation">
411 <param name="transform_method" type="select" label="Select a transfprormation method"> 427 <param name="transform_method" type="select" label="Transformation method">
412 <option value="sqrt" selected="True">sqrt</option> 428 <option value="sqrt" selected="True">sqrt</option>
413 <option value="log">log</option> 429 <option value="log">log</option>
414 <option value="log2">log2</option> 430 <option value="log2">log2</option>
415 <option value="log10">log10</option> 431 <option value="log10">log10</option>
416 <validator type="empty_field" /> 432 <validator type="empty_field" />
417 </param> 433 </param>
418 </when> 434 </when>
419 <when value="Smoothing"> 435 <when value="Smoothing">
420 <conditional name="methods_for_smoothing"> 436 <conditional name="methods_for_smoothing">
421 <param name="smooth_method" type="select" label="This method smoothes the intensity values of a MassSpectrum object"> 437 <param name="smooth_method" type="select" label="Smoothing method" help="This method smoothes the intensity values of a MassSpectrum object.">
422 <option value="SavitzkyGolay" selected="True">SavitzkyGolay</option> 438 <option value="SavitzkyGolay" selected="True">SavitzkyGolay</option>
423 <option value="MovingAverage">MovingAverage</option> 439 <option value="MovingAverage">MovingAverage</option>
424 </param> 440 </param>
425 <when value="SavitzkyGolay"> 441 <when value="SavitzkyGolay">
426 <param name="polynomial" value="3" type="text" label="PolynomialOrder argument to control the order of the filter" 442 <param name="polynomial" value="3" type="text" label="Polynomial order"
427 help="should be smaller than the resulting window"/> 443 help="Controls the order of the filter, should be smaller than the resulting window."/>
428 </when> 444 </when>
429 <when value="MovingAverage"> 445 <when value="MovingAverage">
430 <param name="weighted" type="boolean" label="Weighted average" help = "Indicates if the average should be equal weight or if it should have weights depending on the distance from the center as calculated as 1/2^abs(-halfWindowSize:halfWindowSize) with the sum of all weigths normalized to 1" truevalue="TRUE" falsevalue="FALSE"/> 446 <param name="weighted" type="boolean" label="Weighted average" help = "Indicates if the average should be equal weight or if it should have weights depending on the distance from the center as calculated as 1/2^abs(-halfWindowSize:halfWindowSize) with the sum of all weigths normalized to 1" truevalue="TRUE" falsevalue="FALSE"/>
431 </when> 447 </when>
432 </conditional> 448 </conditional>
433 <param name="halfWindowSize" type="integer" value="10" 449 <param name="halfWindowSize" type="integer" value="10"
434 label="Half window size (number of data points)" 450 label="Half window size"
435 help="The resulting window reaches from 451 help="Number of data points, the resulting window reaches from
436 mass[currentIndex-halfWindowSize] to mass[currentIndex+halfWindowSize] 452 mass[currentIndex-halfWindowSize] to mass[currentIndex+halfWindowSize]
437 (window size is 2*halfWindowSize+1)."/> 453 (window size is 2*halfWindowSize+1)."/>
438 </when> 454 </when>
439 <when value="Baseline"> 455 <when value="Baseline">
440 <conditional name="methods_for_baseline"> 456 <conditional name="methods_for_baseline">
449 <param name="iterations" type="integer" value="100" 465 <param name="iterations" type="integer" value="100"
450 label="Number of iterations" help="Corresponds to half window size: The resulting window reaches from mass[cur_index-iterations] to mass[cur_index+iterations]"/> 466 label="Number of iterations" help="Corresponds to half window size: The resulting window reaches from mass[cur_index-iterations] to mass[cur_index+iterations]"/>
451 </when> 467 </when>
452 <when value="TopHat"> 468 <when value="TopHat">
453 <param name="tophat_halfWindowSize" type="integer" value="10" 469 <param name="tophat_halfWindowSize" type="integer" value="10"
454 label="Half window size (number of data points)" 470 label="Half window size"
455 help="The resulting window reaches from 471 help="Number of data points, the resulting window reaches from
456 mass[currentIndex-halfWindowSize] to mass[currentIndex+halfWindowSize]"/> 472 mass[currentIndex-halfWindowSize] to mass[currentIndex+halfWindowSize]"/>
457 </when> 473 </when>
458 <when value="ConvexHull"/> 474 <when value="ConvexHull"/>
459 <when value="median"> 475 <when value="median">
460 <param name="median_halfWindowSize" type="integer" value="10" 476 <param name="median_halfWindowSize" type="integer" value="10"
461 label="Half window size (number of data points)" 477 label="Half window size"
462 help="The resulting window reaches from 478 help="Number of data points, the resulting window reaches from
463 mass[currentIndex-halfWindowSize] to mass[currentIndex+halfWindowSize]"/> 479 mass[currentIndex-halfWindowSize] to mass[currentIndex+halfWindowSize]"/>
464 </when> 480 </when>
465 </conditional> 481 </conditional>
466 </when> 482 </when>
467 <when value="Calibrate"> 483 <when value="Calibrate">
468 <param name="calibrate_method" type="select" label="Intensity calibration (normalization) method"> 484 <param name="calibrate_method" type="select" label="Intensity calibration method" help="Intensity normalization">
469 <option value="TIC" selected="True">TIC</option> 485 <option value="TIC" selected="True">TIC</option>
470 <option value="PQN">PQN</option> 486 <option value="PQN">PQN</option>
471 <option value="median">median</option> 487 <option value="median">median</option>
472 <validator type="empty_field" /> 488 <validator type="empty_field" />
473 </param> 489 </param>
474 <conditional name="cond_calibration_range"> 490 <conditional name="cond_calibration_range">
475 <param name="calibration_range" type="select" label="Instead of the whole m/z range, a specified m/z range can be used to calculate the scaling factor"> 491 <param name="calibration_range" type="select" label="m/z range" help="Instead of the whole m/z range, a specified m/z range can be used to calculate the scaling factor.">
476 <option value="no" selected="True">complete m/z range</option> 492 <option value="no" selected="True">complete m/z range</option>
477 <option value="yes">specify a m/z range</option> 493 <option value="yes">specify a m/z range</option>
478 </param> 494 </param>
479 <when value="no"/> 495 <when value="no"/>
480 <when value="yes"> 496 <when value="yes">
485 label="End of m/z range, has to be inside m/z range"/> 501 label="End of m/z range, has to be inside m/z range"/>
486 </when> 502 </when>
487 </conditional> 503 </conditional>
488 </when> 504 </when>
489 <when value="Align"> 505 <when value="Align">
490 <param name="warping_method" type="select" label="Warping methods"> 506 <param name="warping_method" type="select" label="Alignment method">
491 <option value="lowess" selected="True">Lowess</option> 507 <option value="lowess" selected="True">Lowess</option>
492 <option value="linear">Linear</option> 508 <option value="linear">Linear</option>
493 <option value="quadratic">Quadratic</option> 509 <option value="quadratic">Quadratic</option>
494 <option value="cubic">Cubic</option> 510 <option value="cubic">Cubic</option>
495 </param> 511 </param>
496 512
497 <param name="tolerance" type="float" value="0.00005" 513 <param name="tolerance" type="float" value="0.00005"
498 label="Tolerance = abs(mz1 - mz2)/mz2" 514 label="Tolerance"
499 help="Maximal relative deviation of a peak position (m/z) to be considered as identical. For 50ppm use 0.00005 or 50e-6" /> 515 help="abs(mz1 - mz2)/mz2, maximal relative deviation of a peak position (m/z) to be considered as identical. For 50ppm use 0.00005 or 5e-5" />
500 516
501 <param name="halfWindowSize" type="integer" value="20" 517 <param name="halfWindowSize" type="integer" value="20"
502 label="Half window size (number of data points)" 518 label="Half window size"
503 help="The resulting window reaches from 519 help="Number of data points, the resulting window reaches from
504 mass[currentIndex-halfWindowSize] to mass[currentIndex+halfWindowSize] 520 mass[currentIndex-halfWindowSize] to mass[currentIndex+halfWindowSize]
505 (window size is 2*halfWindowSize+1). 521 (window size is 2*halfWindowSize+1).
506 The best size differs depending on the selected smoothing method."/> 522 The best size differs depending on the selected smoothing method."/>
507 523
524 <param name="peak_method" type="select" label="Noise estimation function">
525 <option value="MAD" selected="True">MAD</option>
526 <option value="SuperSmoother">SuperSmoother</option>
527 </param>
528
508 <param name="snr" type="integer" value="2" label="Signal-to-noise-ratio"/> 529 <param name="snr" type="integer" value="2" label="Signal-to-noise-ratio"/>
509 <param name="allow_nomatch" type="boolean" label="Don't throw an error when less than 2 reference m/z were found in a spectrum" truevalue="TRUE" falsevalue="FALSE"/> 530 <param name="allow_nomatch" type="boolean" label="Allow no matches" help="Don't throw an error when less than 2 reference m/z were found in a spectrum" truevalue="TRUE" falsevalue="FALSE"/>
510 <param name="empty_nomatch" type="boolean" label="If TRUE the intensity values of MassSpectrum or MassPeaks objects with missing (NA) warping functions are set to zero" truevalue="TRUE" falsevalue="FALSE"/> 531 <param name="empty_nomatch" type="boolean" label="Empty no matches" help="If TRUE the intensity values of MassSpectrum or MassPeaks objects with missing (NA) warping functions are set to zero" truevalue="TRUE" falsevalue="FALSE"/>
511 <param name="remove_empty" type="boolean" label="Should empty spectra be removed" truevalue="TRUE" falsevalue="FALSE" help="For Cardinal RData files this step can only be performed if pixel annotations were provided"/> 532 <param name="remove_empty" type="boolean" label="Remove empty spectra" truevalue="TRUE" falsevalue="FALSE" help="For Cardinal RData files this step can only be performed if pixel annotations were provided"/>
512 533
513 <conditional name="reference_for_alignment"> 534 <conditional name="reference_for_alignment">
514 <param name="align_ref" type="select" label="Reference to which the samples should be aligned" help="Use internal calibrants to perform m/z calibration"> 535 <param name="align_ref" type="select" label="Reference" help="If given, samples will be aligned to reference, use internal calibrants to perform m/z calibration">
515 <option value="no_reference" selected="True">no reference</option> 536 <option value="no_reference" selected="True">no reference</option>
516 <option value="yes_reference">reference from tabular file</option> 537 <option value="yes_reference">reference from tabular file</option>
517 </param> 538 </param>
518 <when value="no_reference"/> 539 <when value="no_reference">
540 <param name="min_frequency" type="float" value="0.9" label = "minFrequency" help="Removal of all peaks which occur in less than minFrequency spectra to generate the reference m/z"/>
541 </when>
519 <when value="yes_reference"> 542 <when value="yes_reference">
520 <param name="reference_file" type="data" format="tabular" 543 <param name="reference_file" type="data" format="tabular"
521 label="Tabular file with m/z (MassPeaks) which should be used for spectra alignment" 544 label="Reference m/z values"
522 help="At least 2 reference m/z per spectrum are needed"/> 545 help="Tabular file"/>
546 <param name="mz_column" data_ref="reference_file" label="Column with m/z values" type="data_column"/>
523 <param name="reference_header" type="boolean" label="File contains a header line" truevalue="TRUE" falsevalue="FALSE"/> 547 <param name="reference_header" type="boolean" label="File contains a header line" truevalue="TRUE" falsevalue="FALSE"/>
548 <param name="separate_alignment" type="boolean" label="Spectrum wise alignment" help="Internal binning is omitted to avoid interaction between spectra" truevalue="TRUE" falsevalue="FALSE"/>
524 </when> 549 </when>
525 </conditional> 550 </conditional>
526 </when> 551 </when>
552 <when value="skip_preprocessing"/>
527 </conditional> 553 </conditional>
528 </repeat> 554 </repeat>
529 <param name="export_processed" type="boolean" label="Export file as processed imzML" help="otherwise continuous imzML will be exported" truevalue="TRUE" falsevalue="FALSE"/> 555 <param name="export_processed" type="boolean" label="Export processed imzML" help="otherwise continuous imzML will be exported" truevalue="TRUE" falsevalue="FALSE"/>
530 </inputs> 556 </inputs>
531 <outputs> 557 <outputs>
532 <data format="imzml" name="outfile_imzml" label="${tool.name} on ${on_string}" /> 558 <data format="imzml" name="outfile_imzml" label="${tool.name} on ${on_string}" />
533 <data format="pdf" name="plots" from_work_dir="prepro_qc_plot.pdf" label="${tool.name} on ${on_string}: QC"/> 559 <data format="pdf" name="plots" from_work_dir="prepro_qc_plot.pdf" label="${tool.name} on ${on_string}: QC"/>
534 </outputs> 560 </outputs>
539 <composite_data value="Example_Continuous.ibd"/> 565 <composite_data value="Example_Continuous.ibd"/>
540 </param> 566 </param>
541 <conditional name="restriction_conditional"> 567 <conditional name="restriction_conditional">
542 <param name="restriction" value="restrict"/> 568 <param name="restriction" value="restrict"/>
543 <param name="coordinates_file" value="restricted_pixels.tabular"/> 569 <param name="coordinates_file" value="restricted_pixels.tabular"/>
570 <param name="column_x" value="1"/>
571 <param name="column_y" value="2"/>
544 </conditional> 572 </conditional>
545 <conditional name="methods_conditional"> 573 <conditional name="methods_conditional">
546 <param name="method" value="Transformation"/> 574 <param name="method" value="Transformation"/>
547 <param name="transform_method" value="log2"/> 575 <param name="transform_method" value="log2"/>
548 <param name="method" value="Smoothing"/> 576 <param name="method" value="Smoothing"/>
549 <param name="smooth_method" value="SavitzkyGolay"/> 577 <param name="smooth_method" value="SavitzkyGolay"/>
550 <param name="method" value="Basline"/> 578 <param name="method" value="Basline"/>
551 <param name="baseline_method" value ="TopHat"/> 579 <param name="baseline_method" value ="TopHat"/>
552 </conditional> 580 </conditional>
553 <output name="outfile_imzml" file="outfile1.imzML" compare="sim_size"/> 581 <output name="outfile_imzml" ftype="imzml" file="preprocessing1.imzml.txt" lines_diff="4">
554 <output name="outfile_imzml" file="outfile1.ibd" compare="sim_size"/> 582 <extra_files type="file" file="outfile1.imzml" name="imzml" lines_diff="6"/>
583 <extra_files type="file" file="outfile1.ibd" name="ibd" compare="sim_size"/>
584 </output>
555 <output name="plots" file="Preprocessing1_QC.pdf" compare="sim_size"/> 585 <output name="plots" file="Preprocessing1_QC.pdf" compare="sim_size"/>
556 </test> 586 </test>
557 <test> 587 <test>
558 <param name="infile" value="msidata_1.RData" ftype="rdata"/> 588 <param name="infile" value="msidata_1.RData" ftype="rdata"/>
559 <conditional name="methods_conditional"> 589 <conditional name="methods_conditional">
560 <param name="method" value="Calibrate"/> 590 <param name="method" value="Align"/>
561 <param name="calibrate_method" value="PQN"/> 591 <param name="warping_method" value="lowess"/>
592 <param name="halfWindowSize" value="5"/>
593 <param name="tolerance" value="0.001"/>
594 <param name="allow_nomatch" value="TRUE"/>
595 <param name="remove_empty" value="TRUE"/>
596 <param name="empty_nomatch" value="TRUE"/>
597 <conditional name="reference_for_alignment">
598 <param name="align_ref" value="yes_reference"/>
599 <param name="reference_file" value="inputpeptides.tabular" ftype="tabular"/>
600 </conditional>
562 </conditional> 601 </conditional>
563 <output name="outfile_imzml" file="outfile2.imzML" compare="sim_size"/> 602 <output name="outfile_imzml" ftype="imzml" file="preprocessing2.imzml.txt" lines_diff="4">
564 <output name="outfile_imzml" file="outfile2.ibd" compare="sim_size"/> 603 <extra_files type="file" file="outfile2.imzml" name="imzml" lines_diff="6"/>
604 <extra_files type="file" file="outfile2.ibd" name="ibd" compare="sim_size"/>
605 </output>
565 <output name="plots" file="Preprocessing2_QC.pdf" compare="sim_size"/> 606 <output name="plots" file="Preprocessing2_QC.pdf" compare="sim_size"/>
566 </test> 607 </test>
567 <test> 608 <test>
568 <param name="infile" value="" ftype="imzml"> 609 <param name="infile" value="" ftype="imzml">
569 <composite_data value="Example_Continuous.imzML"/> 610 <composite_data value="Example_Continuous.imzML"/>
570 <composite_data value="Example_Continuous.ibd"/> 611 <composite_data value="Example_Continuous.ibd"/>
571 </param> 612 </param>
572 <conditional name="tabular_annotation"> 613 <conditional name="methods_conditional">
573 <param name="load_annotation" value="yes_annotation"/> 614 <param name="method" value="Calibrate"/>
574 <param name="annotation_file" value="pixel_annotations.tabular"/> 615 <param name="calibrate_method" value="median"/>
575 <param name="column_x" value="1"/>
576 <param name="column_y" value="2"/>
577 <param name="column_names" value="3"/>
578 <param name="tabular_header" value="TRUE"/>
579 </conditional> 616 </conditional>
580 <conditional name="methods_conditional"> 617 <output name="outfile_imzml" ftype="imzml" file="preprocessing3.imzml.txt" lines_diff="4">
581 <param name="method" value="Align"/> 618 <extra_files type="file" file="outfile3.imzml" name="imzml" lines_diff="6"/>
582 <param name="warping_method" value="linear"/> 619 <extra_files type="file" file="outfile3.ibd" name="ibd" compare="sim_size"/>
583 <param name="halfWindowSize" value="1"/> 620 </output>
584 <param name="tolerance" value="0.002"/>
585 <param name="allow_nomatch" value="TRUE"/>
586 <param name="remove_empty" value="TRUE"/>
587 <param name="empty_nomatch" value="TRUE"/>
588 <conditional name="reference_for_alignment">
589 <param name="align_ref" value="yes_reference"/>
590 <param name="reference_file" value="align_reference_test2.tabular" ftype="tabular"/>
591 </conditional>
592 </conditional>
593 <output name="outfile_imzml" file="outfile3.imzML" compare="sim_size"/>
594 <output name="outfile_imzml" file="outfile3.ibd" compare="sim_size"/>
595 <output name="plots" file="Preprocessing3_QC.pdf" compare="sim_size"/> 621 <output name="plots" file="Preprocessing3_QC.pdf" compare="sim_size"/>
596 </test> 622 </test>
597 </tests> 623 </tests>
598 <help><![CDATA[ 624 <help><![CDATA[
599 625
605 631
606 - MSI data: 2 types of input data can be used: 632 - MSI data: 2 types of input data can be used:
607 633
608 - imzml file (upload imzml and ibd file via the "composite" function) `Introduction to the imzml format <https://ms-imaging.org/wp/imzml/>`_ 634 - imzml file (upload imzml and ibd file via the "composite" function) `Introduction to the imzml format <https://ms-imaging.org/wp/imzml/>`_
609 - Cardinal "MSImageSet" data saved as .RData 635 - Cardinal "MSImageSet" data saved as .RData
610 - Only for Cardinal RData files and when remove empty spectra is chosen: Tabular file with coordinates annotations. Separate columns for x and y coordinates and a third column with pixel annotations. Tabular files with any header name or no header at all are supported 636
611 - Optional: Tabular file with pixel coordinates to restrict reading of imzML files to coordinates of interest. The file has to contain x values in the first column and y values in the second columns. Further columns are allowed. Tabular files with any header name or no header at all are supported. 637 - Optional: Tabular file with pixel coordinates to restrict reading of imzML files to coordinates of interest. Tabular files with any header name or no header at all are supported.
612 638
613 :: 639 ::
614 640
615 x_coord y_coord 641 x_coord y_coord
616 1 1 642 1 1
636 **Options** 662 **Options**
637 663
638 - Transformation: Variance stabilization through intensity transformation:'log', 'log2', 'log10' and 'squareroot' (sqrt) are available 664 - Transformation: Variance stabilization through intensity transformation:'log', 'log2', 'log10' and 'squareroot' (sqrt) are available
639 - Smoothing: Smoothing of the peaks reduces noise and improves peak detection. Available smoothing methods are 'SavitzkyGolay' and 'Moving Average' 665 - Smoothing: Smoothing of the peaks reduces noise and improves peak detection. Available smoothing methods are 'SavitzkyGolay' and 'Moving Average'
640 666
641 - For all smoothing methods: The larger the 'Half window size'f, the stronger the smoothing. The resulting window should be smaller than the FWHM (full width at half maximum) of the typical peaks. Moving average needs smaller window size than SavitzkyGolay. 667 - For all smoothing methods: The larger the 'Half window size', the stronger the smoothing. The resulting window should be smaller than the FWHM (full width at half maximum) of the typical peaks. Moving average needs smaller window size than SavitzkyGolay.
642 - Moving average: Recommended for broader peaks/high m/z range spectra. Weighted moving average: Points in the center get larger weight factors than points away from the center. 668 - Moving average: Recommended for broader peaks/high m/z range spectra. Weighted moving average: Points in the center get larger weight factors than points away from the center.
643 - SavitzkyGolay: Recommended for sharp peaks/low m/z range, preserves the shape of the local maxima. The PolynomialOrder should be smaller than the resulting window. Negative values will be replaced with 0. 669 - SavitzkyGolay: Recommended for sharp peaks/low m/z range, preserves the shape of the local maxima. The PolynomialOrder should be smaller than the resulting window. Negative values will be replaced with 0.
644 670
645 - Baseline reduction: Baseline reduction removes background intensity generated by chemical noise (common in MALDI datasets). 671 - Baseline reduction: Baseline reduction removes background intensity generated by chemical noise (common in MALDI datasets).
646 672
653 - Intensity calibration (normalization): Normalization of intensities to Total Ion Current (TIC), median spectrum, Probabilistic Quotient Normalization (PQN) 679 - Intensity calibration (normalization): Normalization of intensities to Total Ion Current (TIC), median spectrum, Probabilistic Quotient Normalization (PQN)
654 680
655 - TIC and median are local calibration methods: each spectrum is normalized on its own (each peak is divided by the TIC or median of the spectrum) 681 - TIC and median are local calibration methods: each spectrum is normalized on its own (each peak is divided by the TIC or median of the spectrum)
656 - PQN is a global calibration method: In PQN all spectra are calibrated using the TIC calibration first. Subsequently, a median reference spectrum is created and the intensities in all spectra are standardized using the reference spectrum and a spectrum-specific median is calculated for each spectrum. Finally, each spectrum is rescaled by the median of the ratios of its intensity values and that of the reference spectrum 682 - PQN is a global calibration method: In PQN all spectra are calibrated using the TIC calibration first. Subsequently, a median reference spectrum is created and the intensities in all spectra are standardized using the reference spectrum and a spectrum-specific median is calculated for each spectrum. Finally, each spectrum is rescaled by the median of the ratios of its intensity values and that of the reference spectrum
657 683
658 - Spectra alignment (warping): alignment for (re)calibration of m/z values, at least two m/z per spectrum are needed for the alignment. This requirement can be skipped by setting "Don't throw an error when less than 2 reference m/z were found in a spectrum" to yes. If the not aligned spectra should be set to zero select yes in "logical, if TRUE the intensity values of MassSpectrum or MassPeaks objects with missing (NA) warping functions are set to zero". In order to remove such empty spectra set "Should empty spectra be removed" to yes. 684 - Spectra alignment (warping): alignment for (re)calibration of m/z values.
685
686 - peak detection is performed, the reference peaks will be matched to those detected peaks
687 - without external reference m/z: internal reference is obtained by filtering and binning the picked peaks to find landmark peaks and their average m/z
688 - with external reference m/z: the given m/z are used as a reference, at least 10 reference values are recommended
689 - non linear warping (parametric time warping plus binning) to match the reference peaks (internal or external) to the present (picked) peaks with the given tolerance. At least two m/z per spectrum are needed for the alignment. To prevent an error when this criterium is not fullfilled, "Don't throw an error when less than 2 reference m/z were found in a spectrum" should be set to yes. If the not aligned spectra should be set to zero select yes in "If TRUE the intensity values of MassSpectrum or MassPeaks objects with missing (NA) warping functions are set to zero". In order to remove such empty spectra set "Should empty spectra be removed" to yes.
659 690
660 691
661 **Output** 692 **Output**
662 693
663 - imzML file (imzML format can be continuous or processed) 694 - imzML file (imzML format can be continuous or processed)
667 698
668 ]]> 699 ]]>
669 </help> 700 </help>
670 <expand macro="citation"/> 701 <expand macro="citation"/>
671 </tool> 702 </tool>
703