comparison filtering.xml @ 2:0c4579390f73 draft

planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cardinal commit f127be2141cf22e269c85282d226eb16fe14a9c1
author galaxyp
date Fri, 15 Feb 2019 10:15:15 -0500
parents aac805a9d2ae
children 58376f5a6319
comparison
equal deleted inserted replaced
1:aac805a9d2ae 2:0c4579390f73
1 <tool id="cardinal_filtering" name="MSI filtering" version="@VERSION@.1"> 1 <tool id="cardinal_filtering" name="MSI filtering" version="@VERSION@.2">
2 <description>tool for filtering mass spectrometry imaging data</description> 2 <description>tool for filtering mass spectrometry imaging data</description>
3 <macros> 3 <macros>
4 <import>macros.xml</import> 4 <import>macros.xml</import>
5 </macros> 5 </macros>
6 <expand macro="requirements"> 6 <expand macro="requirements">
13 13
14 @INPUT_LINKING@ 14 @INPUT_LINKING@
15 cat '${MSI_subsetting}' && 15 cat '${MSI_subsetting}' &&
16 Rscript '${MSI_subsetting}' && 16 Rscript '${MSI_subsetting}' &&
17 17
18 #if $imzml_output: 18 #if str($imzml_output) == "imzml_format":
19 mkdir $outfile_imzml.files_path && 19 mkdir $outfile_imzml.files_path &&
20 ls -l &&
21 mv ./out.imzML "${os.path.join($outfile_imzml.files_path, 'imzml')}" | true && 20 mv ./out.imzML "${os.path.join($outfile_imzml.files_path, 'imzml')}" | true &&
22 mv ./out.ibd "${os.path.join($outfile_imzml.files_path, 'ibd')}" | true && 21 mv ./out.ibd "${os.path.join($outfile_imzml.files_path, 'ibd')}" | true &&
23 #end if 22 #end if
24 echo "imzML file:" > $outfile_imzml && 23 echo "imzML file:" > $outfile_imzml &&
25 ls -l "$outfile_imzml.files_path" >> $outfile_imzml 24 ls -l "$outfile_imzml.files_path" >> $outfile_imzml
26
27
28 ]]> 25 ]]>
29 </command> 26 </command>
30 27
31 28
32 <configfiles> 29 <configfiles>
56 minimumx = min(coord(msidata)[,1]) 53 minimumx = min(coord(msidata)[,1])
57 maximumx = max(coord(msidata)[,1]) 54 maximumx = max(coord(msidata)[,1])
58 ## Range y coordinates 55 ## Range y coordinates
59 minimumy = min(coord(msidata)[,2]) 56 minimumy = min(coord(msidata)[,2])
60 maximumy = max(coord(msidata)[,2]) 57 maximumy = max(coord(msidata)[,2])
61 ## Number of intensities > 0
62 npeaks= sum(spectra(msidata)[]>0, na.rm=TRUE)
63 ## Spectra multiplied with m/z (potential number of peaks)
64 numpeaks = ncol(spectra(msidata)[])*nrow(spectra(msidata)[])
65 ## Percentage of intensities > 0
66 percpeaks = round(npeaks/numpeaks*100, digits=2)
67 ## Number of empty TICs
68 TICs = colSums(spectra(msidata)[], na.rm=TRUE)
69 NumemptyTIC = sum(TICs == 0)
70 ## median TIC
71 medint = round(median(TICs), digits=2)
72 ## Store features for QC plot 58 ## Store features for QC plot
73 featuresinfile = mz(msidata) 59 featuresinfile = mz(msidata)
74 60
75 ## Next steps will only run if there are more than 0 intensities/pixels/features in the file 61 ## Next steps will only run if there are more than 0 pixels/features in the file
76 62
77 if (sum(spectra(msidata)[]>0, na.rm=TRUE) > 0) 63 if (ncol(msidata)>0 & nrow(msidata) >0)
78 { 64 {
79
80
81 ## prepare dataframe for QC of pixel distribution (will be overwritten in filtering of pixels condition)
82 position_df = cbind(coord(msidata)[,1:2], rep("$infile.element_identifier", times=ncol(msidata)))
83 colnames(position_df)[3] = "annotation"
84
85 ###################################### Filtering of pixels ##################### 65 ###################################### Filtering of pixels #####################
86 ################################################################################ 66 ################################################################################
87 67
88 ############ Pixels in two columns format: x and y in different columns ############# 68 ############ Pixels in two columns format: x and y in different columns #############
89 69
91 print("two columns") 71 print("two columns")
92 72
93 ## read tabular file 73 ## read tabular file
94 input_list = read.delim("$pixels_cond.annotation_file", header = $pixels_cond.tabular_header, 74 input_list = read.delim("$pixels_cond.annotation_file", header = $pixels_cond.tabular_header,
95 stringsAsFactors = FALSE) 75 stringsAsFactors = FALSE)
76 inputpixels = input_list[,c($pixels_cond.column_x, $pixels_cond.column_y)]
77 input_pixels = paste(inputpixels[,1], inputpixels[,2], sep="_")
78 dataset_pixels = paste(coord(msidata)\$x, coord(msidata)\$y, sep="_")
79 pixelsofinterest = dataset_pixels %in% input_pixels
80
81 tryCatch(
82 {
83 msidata = msidata[,pixelsofinterest]
84 if (ncol(msidata) == 0)
85 {
86 stop(call.=FALSE)
87 }
88 },
89 error=function(cond) {
90 ## in case all coordinates were outside the dataset leading to zero pixels, tool is stopped to avoid continuing with wrong data
91 message("Error during pixel filtering")
92 message("Possible problems: Forgot to set 'Tabular file contains a header line' = Yes, wrong columns selected, columns with coordinates contain empty fields or letters, all coordinates were outside the range of the dataset - this can be checked with the 'MSI qualitycontrol' tool)")
93 stop(call.=FALSE)
94 }
95 )
96
97 ## QC values:
96 numberpixels = nrow(input_list) 98 numberpixels = nrow(input_list)
97 inputpixels = input_list[,c($pixels_cond.column_x, $pixels_cond.column_y, $pixels_cond.column_names)]
98
99 ## rewrite into x = 1, y = 1 format and filter msidata, count validpixels
100 pixelvector = character()
101 for (pixel in 1:nrow(inputpixels)){
102 pixelvector[pixel] = paste0("x = ", inputpixels[pixel,1],", ", "y = ", inputpixels[pixel,2])}
103 pixelsofinterest= pixels(msidata)[names(pixels(msidata)) %in% pixelvector]
104 msidata = msidata[,pixelsofinterest]
105 validpixels=ncol(msidata) 99 validpixels=ncol(msidata)
106 100
107 ## in case some pixels are left print annotation plot 101 ########### Pixels wihin x and y minima and maxima are kept #################
108 colnames(inputpixels) = c("x", "y", "annotation")
109 position_df = merge(coord(msidata)[,1:2], inputpixels, by=c("x", "y"), all.x=TRUE)
110 colnames(position_df)[3] = "annotation"
111 position_df\$annotation = factor(position_df\$annotation)
112
113
114 ########### Pixels wihin x and y minima and maxima are kept ###################
115 102
116 #elif str($pixels_cond.pixel_filtering) == "pixel_range": 103 #elif str($pixels_cond.pixel_filtering) == "pixel_range":
117 print("pixel range") 104 print("pixel range")
118 105
106 ## QC values:
119 numberpixels = "range" 107 numberpixels = "range"
120 validpixels = "range" 108 validpixels = "range"
121 109
122 ## only filter pixels if at least one pixel will be left 110 tryCatch(
123 if (sum(coord(msidata)\$x <= $pixels_cond.max_x_range & coord(msidata)\$x >= $pixels_cond.min_x_range) > 0 & sum(coord(msidata)\$y <= $pixels_cond.max_y_range & coord(msidata)\$y >= $pixels_cond.min_y_range) > 0){ 111 {
124 112 msidata = msidata[, coord(msidata)\$x <= $pixels_cond.max_x_range & coord(msidata)\$x >= $pixels_cond.min_x_range]
125 msidata = msidata[, coord(msidata)\$x <= $pixels_cond.max_x_range & coord(msidata)\$x >= $pixels_cond.min_x_range] 113 msidata = msidata[, coord(msidata)\$y <= $pixels_cond.max_y_range & coord(msidata)\$y >= $pixels_cond.min_y_range]
126 msidata = msidata[, coord(msidata)\$y <= $pixels_cond.max_y_range & coord(msidata)\$y >= $pixels_cond.min_y_range] 114 if (ncol(msidata) == 0)
127 }else{ 115 {
128 116 stop(call.=FALSE)
129 print("no valid pixel found") 117 }
130 msidata = msidata[,0]} 118 },
131 119 error=function(cond) {
132 ## update position_df for filtered pixels 120 ## in case one of the ranges was outside the dataset leading to zero pixels, tool is stopped to avoid continuing with wrong data
133 position_df = cbind(coord(msidata)[,1:2], rep("$infile.element_identifier", times=ncol(msidata))) 121 message("Error during pixel filtering")
134 colnames(position_df)[3] = "annotation" 122 message("Check that both x and y ranges were inside the dataset coordinates (can be checked with the 'MSI qualitycontrol' tool) or if any not numeric character was entered into the input fields")
135 position_df\$annotation = factor(position_df\$annotation) 123 stop(call.=FALSE)
124 }
125 )
126
127 ######################## no pixel filtering ################################
136 128
137 #elif str($pixels_cond.pixel_filtering) == "none": 129 #elif str($pixels_cond.pixel_filtering) == "none":
138 print("no pixel filtering") 130 print("no pixel filtering")
139 131
132 ## QC values:
140 numberpixels = 0 133 numberpixels = 0
141 validpixels = 0 134 validpixels = 0
142 135
143 #end if 136 #end if
137
138 ############################# QC data #####################################
139
140 ## dataframe for QC of pixel distribution
141 position_df = cbind(coord(msidata)[,1:2], rep("remaining pixels", times=ncol(msidata)))
142 colnames(position_df)[3] = "annotation"
143 position_df\$annotation = factor(position_df\$annotation)
144 gc()
144 145
145 }else{ 146 }else{
146 print("Inputfile has no intensities > 0") 147 print("Inputfile has no intensities > 0")
147 } 148 }
148 149
149 ################################# filtering of features ###################### 150 ################################# filtering of features ######################
150 ############################################################################## 151 ##############################################################################
151 152
152 ####################### Keep m/z from tabular file ######################### 153 ####################### Keep m/z from tabular file #########################
153 154
154 ## feature filtering only when pixels/features/intensities are left 155 ## feature filtering only when pixels/features/intensities are left
155 156
157
156 if (ncol(msidata) > 0){ 158 if (ncol(msidata) > 0){
157 npeaks_before_filtering= sum(spectra(msidata)[]>0, na.rm=TRUE) 159 if (nrow(msidata) > 0)
158 if (npeaks_before_filtering > 0)
159 { 160 {
160 161
161 #if str($features_cond.features_filtering) == "features_list": 162 #if str($features_cond.features_filtering) == "features_list":
162 print("feature list") 163 print("feature list")
163 164
164 ## read tabular file, define starting row, extract and count valid features 165 ## read tabular file, define starting row, extract and count valid features
165 input_features = read.delim("$mz_tabular", header = $features_cond.feature_header, stringsAsFactors = FALSE) 166 input_features = read.delim("$mz_tabular", header = $features_cond.feature_header, stringsAsFactors = FALSE)
166 extracted_features = input_features[,$features_cond.feature_column] 167 extracted_features = input_features[,$features_cond.feature_column]
167 numberfeatures = length(extracted_features) 168 numberfeatures = length(extracted_features)
169
168 if (class(extracted_features) == "numeric"){ 170 if (class(extracted_features) == "numeric"){
169 ### max digits given in the input file will be used to match m/z but the maximum is 4 171 ### max digits given in the input file will be used to match m/z but the maximum is 4
170 max_digits = max(nchar(sapply(strsplit(as.character(extracted_features), "\\."),`[`,2)), na.rm=TRUE) 172 max_digits = max(nchar(sapply(strsplit(as.character(extracted_features), "\\."),`[`,2)), na.rm=TRUE)
171 173
172 if (max_digits >4) 174 if (max_digits >4)
175 } 177 }
176 178
177 validfeatures = round(extracted_features, max_digits) %in% round(mz(msidata),max_digits) 179 validfeatures = round(extracted_features, max_digits) %in% round(mz(msidata),max_digits)
178 featuresofinterest = features(msidata)[round(mz(msidata), digits = max_digits) %in% round(extracted_features[validfeatures], max_digits)] 180 featuresofinterest = features(msidata)[round(mz(msidata), digits = max_digits) %in% round(extracted_features[validfeatures], max_digits)]
179 validmz = length(unique(featuresofinterest)) 181 validmz = length(unique(featuresofinterest))
182
180 }else{ 183 }else{
181 validmz = 0 184 validmz = 0
182 featuresofinterest = 0} 185 featuresofinterest = 0}
183 186
184 ### filter msidata for valid features 187 ### filter msidata for valid features
185 msidata = msidata[featuresofinterest,] 188
189 tryCatch(
190 {
191 msidata = msidata[featuresofinterest,]
192 ## does not throw error when processed file has no features left, therefore create error to avoid continuing with wrong data
193 if (nrow(msidata) == 0)
194 {
195 stop(call.=FALSE)
196 }
197 },
198 error=function(cond) {
199 ## in case all provided m/z values were outside the m/z range
200 ## tool is stopped to avoid continuing with wrong data
201 message("Error during m/z filtering")
202 message("Possible problems: Forgot to set 'Tabular file contains a header line' = Yes, wrong columns selected, column with m/z features contains empty fields or letters, all m/z features s were outside the range of the dataset - this can be checked with the 'MSI qualitycontrol' tool) or did not match any m/z feature of the dataset (see help section for more information on that)")
203 stop(call.=FALSE)
204 }
205 )
206
186 207
187 ############### features within a given range are kept ##################### 208 ############### features within a given range are kept #####################
188 209
189 #elif str($features_cond.features_filtering) == "features_range": 210 #elif str($features_cond.features_filtering) == "features_range":
190 print("feature range") 211 print("feature range")
191 212
192 numberfeatures = "range" 213 numberfeatures = "range"
193 validmz = "range" 214 validmz = "range"
194 215
195 if (sum(mz(msidata) >= $features_cond.min_mz & mz(msidata) <= $features_cond.max_mz)> 0){ 216 tryCatch(
196 msidata = msidata[mz(msidata) >= $features_cond.min_mz & mz(msidata) <= $features_cond.max_mz,] 217 {
197 }else{ 218 msidata = msidata[mz(msidata) >= $features_cond.min_mz & mz(msidata) <= $features_cond.max_mz,]
198 msidata = msidata[0,] 219 ## does not throw error when processed file has no features left, therefore create error to avoid continuing with wrong data
199 print("no valid mz range")} 220 if (nrow(msidata) == 0)
221 {
222 stop(call.=FALSE)
223 }
224 },
225 error=function(cond) {
226 ## in case all m/z features were outside the dataset leading to zero m/z features, tool is stopped to avoid continuing with wrong data
227 message("Error during m/z filtering")
228 message("Check that the entered m/z range is inside the dataset coordinates (can be checked with the 'MSI qualitycontrol' tool) or if any not numeric character was entered into the input fields")
229 stop(call.=FALSE)
230 }
231 )
200 232
201 ############### Remove m/z from tabular file ######################### 233 ############### Remove m/z from tabular file #########################
202 234
203 #elif str($features_cond.features_filtering) == "remove_features": 235 #elif str($features_cond.features_filtering) == "remove_features":
204 print("remove features") 236 print("remove features")
205 237
206 ## read tabular file, define starting row, extract and count valid features 238 ## read tabular file, define starting row, extract and count valid features
207 input_features = read.delim("$mz_tabular", header = $features_cond.removal_header, stringsAsFactors = FALSE) 239 input_features = read.delim("$mz_tabular", header = $features_cond.feature_header, stringsAsFactors = FALSE)
208 extracted_features = input_features[,$features_cond.removal_column] 240 extracted_features = input_features[,$features_cond.feature_column]
209 numberfeatures = length(extracted_features) 241 numberfeatures = length(extracted_features)
210 if (class(extracted_features) == "numeric"){ 242 if (class(extracted_features) == "numeric"){
211 print("input is numeric") 243 print("input is numeric")
212 featuresofinterest = extracted_features 244 featuresofinterest = extracted_features
213 validmz = sum(featuresofinterest <= max(mz(msidata))& featuresofinterest >= min(mz(msidata))) 245 }else{featuresofinterest = 0}
214 }else{featuresofinterest = 0
215 validmz = 0}
216 246
217 ### Here starts removal of features: 247 ### Here starts removal of features:
218 plusminus = $features_cond.removal_plusminus 248 plusminus = $features_cond.removal_plusminus
219 249
220 mass_to_remove = numeric() 250 tryCatch(
221 if (sum(featuresofinterest) > 0){ 251 {
222 for (masses in featuresofinterest){ 252 mass_to_remove = numeric()
223 #if str($features_cond.units_removal) == "ppm": 253 for (masses in featuresofinterest){
224 plusminus = masses * $features_cond.removal_plusminus/1000000 254 #if str($features_cond.units_removal) == "ppm":
225 #end if 255 plusminus = masses * $features_cond.removal_plusminus/1000000
226 current_mass = which(c(mz(msidata) <= masses + plusminus & mz(msidata) >= masses - plusminus)) 256 #end if
227 mass_to_remove = append(mass_to_remove, current_mass)} 257 current_mass = which(c(mz(msidata) <= masses + plusminus & mz(msidata) >= masses - plusminus))
228 msidata= msidata[-mass_to_remove, ] 258 mass_to_remove = append(mass_to_remove, current_mass)}
229 }else{print("No features were removed as they were not fitting to m/z values and/or range")} 259 msidata= msidata[-mass_to_remove, ]
230 260 validmz = numberfeatures - nrow(msidata)
261 ## does not throw error when processed file has no features left, therefore create error to avoid continuing with wrong data
262 if (nrow(msidata) == 0)
263 {
264 stop(call.=FALSE)
265 }
266 },
267 error=function(cond) {
268 message("Error during removal of m/z features")
269 stop(call.=FALSE)
270 }
271 )
272
273
274
275
276 ######################## No m/z filtering ##############################
231 277
232 #elif str($features_cond.features_filtering) == "none": 278 #elif str($features_cond.features_filtering) == "none":
233 279
234 print("no feature filtering") 280 print("no feature filtering")
235 validmz = 0 281 validmz = 0
236 numberfeatures = 0 282 numberfeatures = 0
237 283
238 #end if 284 #end if
239 285
240 ## save msidata as Rfile
241 save(msidata, file="$msidata_filtered")
242
243 ## Number of empty TICs
244 TICs2 = colSums(spectra(msidata)[], na.rm=TRUE)
245 ## Number of intensities > 0
246 npeaks2= sum(spectra(msidata)[]>0, na.rm=TRUE)
247 ## Spectra multiplied with m/z (potential number of peaks)
248 numpeaks2 = ncol(spectra(msidata)[])*nrow(spectra(msidata)[])
249
250
251
252 }else{ 286 }else{
253 print("Inputfile or file filtered for pixels has no intensities > 0") 287 print("Inputfile has no m/z features")
254 numberfeatures = NA 288 numberfeatures = NA
255 validmz = NA 289 validmz = NA
256 ## Number of empty TICs
257 TICs2 = 0
258 npeaks2 = 0
259 numpeaks2 = 0
260 } 290 }
261 }else{ 291 }else{
262 print("Inputfile or file filtered for pixels has no pixels left") 292 print("Inputfile or file filtered for pixels has no pixels")
263 numberfeatures = NA 293 numberfeatures = NA
264 validmz = NA 294 validmz = NA
265 ## Number of empty TICs
266 TICs2 = 0
267 npeaks2 = 0
268 numpeaks2 = 0
269 } 295 }
270 #################### QC numbers ####################### 296 gc()
271 297
272 298 #################### QC numbers #######################
273 ## Number of features (m/z) 299
274 maxfeatures2 = length(features(msidata)) 300 ## Number of features (m/z)
275 ## Range m/z 301 maxfeatures2 = length(features(msidata))
276 minmz2 = round(min(mz(msidata)), digits=2) 302 ## Range m/z
277 maxmz2 = round(max(mz(msidata)), digits=2) 303 minmz2 = round(min(mz(msidata)), digits=2)
278 ## Number of spectra (pixels) 304 maxmz2 = round(max(mz(msidata)), digits=2)
279 pixelcount2 = length(pixels(msidata)) 305 ## Number of spectra (pixels)
280 ## Range x coordinates 306 pixelcount2 = length(pixels(msidata))
281 minimumx2 = min(coord(msidata)[,1]) 307 ## Range x coordinates
282 maximumx2 = max(coord(msidata)[,1]) 308 minimumx2 = min(coord(msidata)[,1])
283 ## Range y coordinates 309 maximumx2 = max(coord(msidata)[,1])
284 minimumy2 = min(coord(msidata)[,2]) 310 ## Range y coordinates
285 maximumy2 = max(coord(msidata)[,2]) 311 minimumy2 = min(coord(msidata)[,2])
286 312 maximumy2 = max(coord(msidata)[,2])
287 ## Percentage of intensities > 0 313
288 percpeaks2 = round(npeaks2/numpeaks2*100, digits=2) 314 properties = c("Number of m/z features",
289 ## Number of empty TICs 315 "Range of m/z values",
290 NumemptyTIC2 = sum(TICs2 == 0) 316 "Number of pixels",
291 ## median TIC 317 "Range of x coordinates",
292 medint2 = round(median(TICs2), digits=2) 318 "Range of y coordinates",
293 319 "pixel overview",
294 properties = c("Number of m/z features", 320 "feature overview")
295 "Range of m/z values", 321
296 "Number of pixels", 322 before = c(paste0(maxfeatures),
297 "Range of x coordinates", 323 paste0(minmz, " - ", maxmz),
298 "Range of y coordinates", 324 paste0(pixelcount),
299 "Intensities > 0", 325 paste0(minimumx, " - ", maximumx),
300 "Median TIC per pixel", 326 paste0(minimumy, " - ", maximumy),
301 "Number of empty spectra", 327 paste0("input pixels: ", numberpixels),
302 "pixel overview", 328 paste0("input mz: ", numberfeatures))
303 "feature overview") 329
304 330 filtered = c(paste0(maxfeatures2),
305 before = c(paste0(maxfeatures), 331 paste0(minmz2, " - ", maxmz2),
306 paste0(minmz, " - ", maxmz), 332 paste0(pixelcount2),
307 paste0(pixelcount), 333 paste0(minimumx2, " - ", maximumx2),
308 paste0(minimumx, " - ", maximumx), 334 paste0(minimumy2, " - ", maximumy2),
309 paste0(minimumy, " - ", maximumy), 335 paste0("valid pixels: ", validpixels),
310 paste0(percpeaks, " %"), 336 paste0("valid mz: ", validmz))
311 paste0(medint), 337
312 paste0(NumemptyTIC), 338 property_df = data.frame(properties, before, filtered)
313 paste0("input pixels: ", numberpixels), 339
314 paste0("input mz: ", numberfeatures)) 340 ########################### PDF QC and MSI output ###########################
315 341
316 filtered = c(paste0(maxfeatures2), 342 pdf("filtertool_QC.pdf", fonts = "Times", pointsize = 12)
317 paste0(minmz2, " - ", maxmz2), 343 plot(0,type='n',axes=FALSE,ann=FALSE)
318 paste0(pixelcount2), 344 title(main=paste0("Qualitycontrol of filtering tool for file: \n\n", "$infile.display_name"))
319 paste0(minimumx2, " - ", maximumx2), 345 grid.table(property_df, rows= NULL)
320 paste0(minimumy2, " - ", maximumy2), 346
321 paste0(percpeaks2, " %"), 347 ## QC report only when pixels/features are left
322 paste0(medint2), 348 if (ncol(msidata)>0 & nrow(msidata) >0)
323 paste0(NumemptyTIC2),
324 paste0("valid pixels: ", validpixels),
325 paste0("valid mz: ", validmz))
326
327 property_df = data.frame(properties, before, filtered)
328 print(property_df)
329
330 ########################### PDF QC and imzml output ###########################
331
332 pdf("filtertool_QC.pdf", fonts = "Times", pointsize = 12)
333 plot(0,type='n',axes=FALSE,ann=FALSE)
334 title(main=paste0("Qualitycontrol of filtering tool for file: \n\n", "$infile.display_name"))
335 grid.table(property_df, rows= NULL)
336
337 ## QC report with more than value-table: only when pixels/features/intensities are left
338 if (npeaks2 > 0)
339 { 349 {
340 350
351 ### visual pixel control
352
353 pixel_image = ggplot(position_df, aes(x=x, y=y, fill=annotation))+
354 geom_tile(height = 1, width=1)+
355 coord_fixed()+
356 ggtitle("Spatial orientation of filtered pixels")+
357 theme_bw()+
358 theme(plot.title = element_text(hjust = 0.5))+
359 theme(legend.position="bottom",legend.direction="vertical")
360 print(pixel_image)
361
362 ### plot features which are removed
363 hist(mz(msidata), xlab="m/z", main="Kept m/z values")
364 #if str($features_cond.features_filtering) == "none":
365 print("no difference histogram as no m/z filtering took place")
366 #else:
367 if (isTRUE(all.equal(featuresinfile, mz(msidata)))){
368 print("No difference in m/z values before and after filtering, no histogram drawn")
369 }else{
370 hist(setdiff(featuresinfile, mz(msidata)), xlab="m/z", main="Removed m/z values")}
371 #end if
372
373 dev.off()
374
341 ## save msidata as imzML file, will only work if there is at least 1 m/z left 375 ## save msidata as imzML file, will only work if there is at least 1 m/z left
342 #if $imzml_output: 376
377 #if str($imzml_output) == "imzml_format":
343 if (maxfeatures2 > 0){ 378 if (maxfeatures2 > 0){
379 ## make sure that coordinates are integers
380 coord(msidata)\$y = as.integer(coord(msidata)\$y)
381 coord(msidata)\$x = as.integer(coord(msidata)\$x)
344 writeImzML(msidata, "out")} 382 writeImzML(msidata, "out")}
383 #elif str($imzml_output) == "rdata_format":
384 ## save msidata as Rfile
385 iData(msidata) = iData(msidata)[]
386 save(msidata, file="$outfile_rdata")
345 #end if 387 #end if
346
347
348 ### visual pixel control
349
350 levels(position_df\$annotation) = factor(paste(1:length(levels(position_df\$annotation)), levels(position_df\$annotation), sep="_"))
351
352 pixel_image = ggplot(position_df, aes(x=x, y=y, fill=annotation))+
353 geom_tile(height = 1, width=1)+
354 coord_fixed()+
355 ggtitle("Spatial orientation of filtered pixels")+
356 theme_bw()+
357 theme(plot.title = element_text(hjust = 0.5))+
358 theme(text=element_text(family="ArialMT", face="bold", size=12))+
359 theme(legend.position="bottom",legend.direction="vertical")+
360 theme(legend.key.size = unit(0.2, "line"), legend.text = element_text(size = 6))+
361 guides(fill=guide_legend(ncol=4,byrow=TRUE))
362
363 coord_labels = aggregate(cbind(x,y)~annotation, data=position_df, mean, na.rm=TRUE, na.action="na.pass")
364 coord_labels\$file_number = 1:length(levels(position_df\$annotation))
365
366 for(file_count in 1:nrow(coord_labels))
367 {pixel_image = pixel_image + annotate("text",x=coord_labels[file_count,"x"],
368 y=coord_labels[file_count,"y"],label=toString(coord_labels[file_count,4]))}
369
370 print(pixel_image)
371
372 ### control features which are removed
373 hist(mz(msidata), xlab="m/z", main="Kept m/z values")
374 #if str($features_cond.features_filtering) == "none":
375 print("no difference histogram as no m/z filtering took place")
376 #else:
377
378 if (isTRUE(all.equal(featuresinfile, mz(msidata)))){
379 print("No difference in m/z values before and after filtering, no histogram drawn")
380 }else{
381 hist(setdiff(featuresinfile, mz(msidata)), xlab="m/z", main="Removed m/z values")}
382 #end if
383
384 dev.off()
385 388
386 389
387 }else{ 390 }else{
388 print("Inputfile or filtered file has no intensities > 0") 391 print("Inputfile or filtered file has no intensities > 0")
389 dev.off() 392 dev.off()
390 } 393 }
394
391 ]]></configfile> 395 ]]></configfile>
392 </configfiles> 396 </configfiles>
393 <inputs> 397 <inputs>
394 <expand macro="reading_msidata"/> 398 <expand macro="reading_msidata"/>
395 <conditional name="pixels_cond"> 399 <conditional name="pixels_cond">
396 <param name="pixel_filtering" type="select" label="Select pixel filtering option"> 400 <param name="pixel_filtering" type="select" label="Select pixel filtering option">
397 <option value="none" selected="True">none</option> 401 <option value="none" selected="True">none</option>
398 <option value="two_columns">list of pixel coordinates (tabular file)</option> 402 <option value="two_columns">coordinates from tabular file</option>
399 <option value="pixel_range">ranges for x and y (manually)</option> 403 <option value="pixel_range">ranges for x and y (manually)</option>
400 </param> 404 </param>
401 <when value="none"/> 405 <when value="none"/>
402 <when value="two_columns"> 406 <when value="two_columns">
403 <expand macro="reading_pixel_annotations"/> 407 <param name="annotation_file" type="data" format="tabular" label="Tabular file with pixel coordinates"
404 408 help="Tabular file with two columns: x values and y values"/>
409 <param name="column_x" data_ref="annotation_file" label="Column with x values" type="data_column"/>
410 <param name="column_y" data_ref="annotation_file" label="Column with y values" type="data_column"/>
411 <param name="tabular_header" type="boolean" label="Tabular file contains a header line" truevalue="TRUE" falsevalue="FALSE"/>
405 </when> 412 </when>
406 <when value="pixel_range"> 413 <when value="pixel_range">
407 <param name="min_x_range" type="integer" value="0" label="Minimum value for x"/> 414 <param name="min_x_range" type="integer" value="0" label="Minimum value for x"/>
408 <param name="max_x_range" type="integer" value="100" label="Maximum value for x"/> 415 <param name="max_x_range" type="integer" value="100" label="Maximum value for x"/>
409 <param name="min_y_range" type="integer" value="0" label="Minimum value for y"/> 416 <param name="min_y_range" type="integer" value="0" label="Minimum value for y"/>
412 </conditional> 419 </conditional>
413 420
414 <conditional name="features_cond"> 421 <conditional name="features_cond">
415 <param name="features_filtering" type="select" label="Select m/z feature filtering option"> 422 <param name="features_filtering" type="select" label="Select m/z feature filtering option">
416 <option value="none" selected="True">none</option> 423 <option value="none" selected="True">none</option>
417 <option value="features_list">keep a list of m/z (tabular file)</option> 424 <option value="features_list">keep m/z (tabular file)</option>
418 <option value="features_range">m/z range (manually)</option> 425 <option value="features_range">m/z range (manually)</option>
419 <option value="remove_features">remove a list of m/z (tabular file)</option> 426 <option value="remove_features">remove m/z (tabular file)</option>
420 </param> 427 </param>
421 <when value="none"/> 428 <when value="none"/>
422 <when value="features_list"> 429 <when value="features_list">
423 <expand macro="reading_1_column_mz_tabular" label="Tabular file with m/z features to keep"/> 430 <expand macro="reading_1_column_mz_tabular" label="Tabular file with m/z features to keep"/>
424 </when> 431 </when>
433 <option value="ppm" selected="True">ppm</option> 440 <option value="ppm" selected="True">ppm</option>
434 <option value="Da">Da</option> 441 <option value="Da">Da</option>
435 </param> 442 </param>
436 </when> 443 </when>
437 </conditional> 444 </conditional>
438 <param name="imzml_output" type="boolean" label="Output of imzML file" truevalue="TRUE" falsevalue="FALSE"/> 445 <param name="imzml_output" type="select" display = "radio" optional = "False"
439 446 label="Output format" help= "Choose the output format">
447 <option value="imzml_format" selected="True">imzML</option>
448 <option value="rdata_format">RData</option>
449 </param>
440 </inputs> 450 </inputs>
441 451
442 <outputs> 452 <outputs>
443 <data format="rdata" name="msidata_filtered" label="${tool.name} on ${on_string}"/> 453 <data format="imzml" name="outfile_imzml" label="${tool.name} on ${on_string}: imzML">
454 <filter>imzml_output =='imzml_format'</filter>
455 </data>
456 <data format="rdata" name="outfile_rdata" label="${tool.name} on ${on_string}: RData">
457 <filter>imzml_output == 'rdata_format'</filter>
458 </data>
444 <data format="pdf" name="QC_overview" from_work_dir="filtertool_QC.pdf" label = "${tool.name} on ${on_string}: QC"/> 459 <data format="pdf" name="QC_overview" from_work_dir="filtertool_QC.pdf" label = "${tool.name} on ${on_string}: QC"/>
445 <data format="imzml" name="outfile_imzml" label="${tool.name} on ${on_string}: imzML">
446 <filter>imzml_output</filter>
447 </data>
448 </outputs> 460 </outputs>
449 <tests> 461 <tests>
450 <test> 462 <test>
451 <expand macro="infile_imzml"/> 463 <expand macro="infile_imzml"/>
452 <param name="pixel_filtering" value="pixel_range"/> 464 <param name="pixel_filtering" value="pixel_range"/>
453 <param name="min_x_range" value="1"/> 465 <param name="min_x_range" value="1"/>
454 <param name="max_x_range" value="20"/> 466 <param name="max_x_range" value="20"/>
455 <param name="min_y_range" value="2"/> 467 <param name="min_y_range" value="2"/>
456 <param name="max_y_range" value="2"/> 468 <param name="max_y_range" value="2"/>
457 <param name="features_filtering" value="features_range"/> 469 <param name="features_filtering" value="features_range"/>
458 <param name="min_mz" value="350" /> 470 <param name="min_mz" value="350"/>
459 <param name="max_mz" value="500"/> 471 <param name="max_mz" value="500"/>
472 <param name="imzml_output" value="imzml_format"/>
460 <output name="QC_overview" file="imzml_filtered3.pdf" compare="sim_size"/> 473 <output name="QC_overview" file="imzml_filtered3.pdf" compare="sim_size"/>
461 <output name="msidata_filtered" file="imzml_filtered3.RData" compare="sim_size"/> 474 <output name="outfile_imzml" ftype="imzml" file="out3.imzml.txt" compare="sim_size">
475 <extra_files type="file" file="out3.imzml" name="imzml" lines_diff="4"/>
476 <extra_files type="file" file="out3.ibd" name="ibd" compare="sim_size"/>
477 </output>
462 </test> 478 </test>
463 <test> 479 <test>
464 <expand macro="infile_imzml"/> 480 <expand macro="infile_imzml"/>
465 <param name="pixel_filtering" value="two_columns"/> 481 <param name="pixel_filtering" value="two_columns"/>
466 <param name="annotation_file" ftype="tabular" value = "inputpixels_2column.tabular"/> 482 <param name="annotation_file" ftype="tabular" value="inputpixels_2column.tabular"/>
467 <param name="column_x" value="1"/> 483 <param name="column_x" value="1"/>
468 <param name="column_y" value="3"/> 484 <param name="column_y" value="3"/>
469 <param name="column_names" value="2"/> 485 <param name="imzml_output" value="imzml_format"/>
470 <output name="QC_overview" file="imzml_filtered4.pdf" compare="sim_size"/> 486 <output name="QC_overview" file="imzml_filtered4.pdf" compare="sim_size"/>
471 <output name="msidata_filtered" file="imzml_filtered4.RData" compare="sim_size"/> 487 <output name="outfile_imzml" ftype="imzml" file="out4.imzml.txt" compare="sim_size">
472 <!--imzml output test not yet working: output name="outfile_imzml" file="filtering_imzmls/summary" compare="sim_size" delta="10000"> 488 <extra_files type="file" file="out4.imzml" name="imzml" lines_diff="4"/>
473 <extra_files type="file" name="imzml" value="filtering_imzmls/out4.imzML" compare="sim_size" delta="10000"/> 489 <extra_files type="file" file="out4.ibd" name="ibd" compare="sim_size"/>
474 <extra_files type="file" name="ibd" value="filtering_imzmls/out4.ibd" compare="sim_size" delta="10000"/> 490 </output>
475 </output-->
476 </test> 491 </test>
477 <test> 492 <test>
478 <expand macro="infile_imzml"/> 493 <expand macro="infile_imzml"/>
479 <param name="pixel_filtering" value="pixel_range"/> 494 <param name="pixel_filtering" value="pixel_range"/>
480 <param name="min_x_range" value="0"/> 495 <param name="min_x_range" value="0"/>
483 <param name="max_y_range" value="20"/> 498 <param name="max_y_range" value="20"/>
484 <param name="features_filtering" value="features_list"/> 499 <param name="features_filtering" value="features_list"/>
485 <param name="mz_tabular" ftype="tabular" value = "featuresofinterest5.tabular"/> 500 <param name="mz_tabular" ftype="tabular" value = "featuresofinterest5.tabular"/>
486 <param name="feature_column" value="1"/> 501 <param name="feature_column" value="1"/>
487 <param name="feature_header" value="0"/> 502 <param name="feature_header" value="0"/>
503 <param name="imzml_output" value="imzml_format"/>
488 <output name="QC_overview" file="imzml_filtered5.pdf" compare="sim_size"/> 504 <output name="QC_overview" file="imzml_filtered5.pdf" compare="sim_size"/>
489 <output name="msidata_filtered" file="imzml_filtered5.RData" compare="sim_size" /> 505 <output name="outfile_imzml" ftype="imzml" file="out5.imzml.txt" compare="sim_size">
506 <extra_files type="file" file="out5.imzml" name="imzml" lines_diff="4"/>
507 <extra_files type="file" file="out5.ibd" name="ibd" compare="sim_size"/>
508 </output>
490 </test> 509 </test>
491 <test> 510 <test>
492 <expand macro="infile_analyze75"/> 511 <expand macro="infile_analyze75"/>
512 <param name="imzml_output" value="imzml_format"/>
493 <output name="QC_overview" file="analyze75_filtered2.pdf" compare="sim_size"/> 513 <output name="QC_overview" file="analyze75_filtered2.pdf" compare="sim_size"/>
494 <output name="msidata_filtered" file="analyze_filteredoutside.RData" compare="sim_size" /> 514 <output name="outfile_imzml" ftype="imzml" file="out6.imzml.txt" compare="sim_size">
515 <extra_files type="file" file="out6.imzml" name="imzml" lines_diff="4"/>
516 <extra_files type="file" file="out6.ibd" name="ibd" compare="sim_size"/>
517 </output>
495 </test> 518 </test>
496 <test> 519 <test>
497 <param name="infile" value="preprocessed.RData" ftype="rdata"/> 520 <param name="infile" value="preprocessed.RData" ftype="rdata"/>
498 <conditional name="outputs"> 521 <conditional name="outputs">
499 <param name="outputs_select" value="no_quality_control"/> 522 <param name="outputs_select" value="no_quality_control"/>
500 </conditional> 523 </conditional>
501 <output name="msidata_filtered" file="rdata_notfiltered.RData" compare="sim_size"/> 524 <param name="imzml_output" value="imzml_format"/>
502 <output name="QC_overview" file="rdata_notfiltered.pdf" compare="sim_size" /> 525 <output name="QC_overview" file="rdata_notfiltered.pdf" compare="sim_size" />
526 <output name="outfile_imzml" ftype="imzml" file="out7.imzml.txt" compare="sim_size">
527 <extra_files type="file" file="out7.imzml" name="imzml" lines_diff="4"/>
528 <extra_files type="file" file="out7.ibd" name="ibd" compare="sim_size"/>
529 </output>
503 </test> 530 </test>
504 </tests> 531 </tests>
505 <help> 532 <help>
506 <![CDATA[ 533 <![CDATA[
507 534
511 538
512 This tool provides options to filter (subset) pixels and m/z features of mass spectrometry imaging data. 539 This tool provides options to filter (subset) pixels and m/z features of mass spectrometry imaging data.
513 540
514 @MSIDATA_INPUT_DESCRIPTION@ 541 @MSIDATA_INPUT_DESCRIPTION@
515 542
516 @SPECTRA_TABULAR_INPUT_DESCRIPTION@ 543 - Optional file with pixel coordinates and annotation:
544
545 - Tabular file: One column with x values, one column with y values
546 - The file is allowed to have any column names as header (in this case set "Tabular file contains a header line" to yes)
547 - Pixel with coordinates outside the coordinates of the input file are ignored
548
549 ::
550
551 x_coord y_coord
552 1 1
553 2 1
554 3 1
555 ...
556 ...
517 557
518 @MZ_TABULAR_INPUT_DESCRIPTION@ 558 @MZ_TABULAR_INPUT_DESCRIPTION@
519 559
520 **Options** 560 **Options**
521 561
522 - pixel filtering/annotation: either with a tabular file containing x and y coordinates and pixel annotations or by defining a range for x and y by hand (for the latter no annotation is possible). Pixel that are not present in the dataset are ignored. It is not possible to filter only for pixels that are not present in the dataset. 562 - Pixel filtering/annotation:
523 - m/z feature filtering: m/z values for filtering should be either imported as a tabular file containing containing m/z of interest or by defining a range for the m/z values. m/z that are not present in the dataset are ignored. It is not possible to filter only for m/z that are not present in the dataset. 563
524 - m/z feature removing: perturbing m/z features such as matrix contaminants can be removed by specifying their m/z in a tabular file, optionally with a half window size in ppm or m/z for the window in which peaks should be removed. 564 - Either with a tabular file containing x and y coordinates or by entering x-min, x-max, y-min, y-max manually
565 - Pixel that are not present in the dataset are ignored
566 - An error occurs if the input for filtering (tabular file, x-range or y-range) contains not a single coordinate that occurs in the input dataset
567
568
569 - m/z feature filtering:
570
571 - Either with a tabular file containing m/z values or by entering m/z-min and m/z-max manually
572 - m/z values that are not present in the dataset are ignored
573 - An error occurs if the input for filtering (tabular file or mz-range) contains not a single m/z feature that occurs in the dataset
574
575
576 - m/z feature removing:
577
578 - Perturbing m/z features such as matrix contaminants can be removed by specifying their m/z value in a tabular file, optionally with a half window size in ppm or m/z for the window in which peaks should be removed
525 579
526 580
527 **Tips** 581 **Tips**
528 582
529 - Numeric m/z features imported via a tabular file and m/z features of the dataset are rounded to 4 decimal points (or maximum number of decimal points of input m/z) and then matched. Therefore, it is recommended to use the filtering tool only for m/z which have been extracted from the same dataset. If the m/z values are from a different dataset, the tool "Join two files on column allowing a small difference" should be used to find corresponding m/z values, which can then be used for filtering. 583 - m/z feautre filtering with a tabular file:
530 - In case tabular file cannot be selected in drop-down menu: Datatype in Galaxy must be tabular otherwise file will not appear in selection window (if Galaxy auto-detection was wrong, datatype can be changed by pressing the pen button (edit attributes)) 584
585 - For matching the m/z features of the input dataset are rounded to the number of decimal points of the m/z values from the tabular file. In case the input had more than 4 digits m/z values of dataset and tabular file are rounded to 4 digits.
586 - Therefore, it is recommended to use the filtering tool only for m/z features which have been extracted from the same dataset. If the m/z values are from a different dataset, the tool 'Join two files on column allowing a small difference' should be used to find corresponding m/z values, which can then be used for filtering.
587
588 - Problems to select tabular file:
589
590 - In case tabular file cannot be selected in drop-down menu: Datatype in Galaxy might be different from 'tabular' - datatype can be changed by pressing the pen button of the dataset (edit attributes)
531 591
532 592
533 **Output** 593 **Output**
534 594
535 - MSI data as .RData output (can be read with the Cardinal package in R) 595 - MSI data as imzML file or .RData (can be read with the Cardinal package in R)
536 - optional: MSI data as imzML file
537 - pdf with heatmap showing the pixels that are left after filtering and histograms of kept and removed m/z 596 - pdf with heatmap showing the pixels that are left after filtering and histograms of kept and removed m/z
538 597
539 598
540 ]]> 599 ]]>
541 </help> 600 </help>