comparison msi_filtering.xml @ 3:d51c3c814d57 draft

planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/msi_filtering commit a7be47698f53eb4f00961192327d93e8989276a7
author galaxyp
date Mon, 11 Jun 2018 17:33:40 -0400
parents 22db5eb94e50
children bf61fc662615
comparison
equal deleted inserted replaced
2:22db5eb94e50 3:d51c3c814d57
1 <tool id="mass_spectrometry_imaging_filtering" name="MSI filtering" version="1.10.0.0"> 1 <tool id="mass_spectrometry_imaging_filtering" name="MSI filtering" version="1.10.0.1">
2 <description>tool for filtering mass spectrometry imaging data</description> 2 <description>tool for filtering mass spectrometry imaging data</description>
3 <requirements> 3 <requirements>
4 <requirement type="package" version="1.10.0">bioconductor-cardinal</requirement> 4 <requirement type="package" version="1.10.0">bioconductor-cardinal</requirement>
5 <requirement type="package" version="2.2.1">r-gridextra</requirement> 5 <requirement type="package" version="2.2.1">r-gridextra</requirement>
6 </requirements> 6 </requirements>
39 msidata = readAnalyze('infile') 39 msidata = readAnalyze('infile')
40 #else 40 #else
41 load('infile.RData') 41 load('infile.RData')
42 #end if 42 #end if
43 43
44 ##################################### QC: inputfile properties in numbers ###### 44 ########################### optional QC numbers ########################
45 45
46 #if $outputs.outputs_select == "quality_control": 46 #if $outputs.outputs_select == "quality_control":
47 ## Number of features (mz) 47
48 ## Number of features (m/z)
48 maxfeatures = length(features(msidata)) 49 maxfeatures = length(features(msidata))
49 ## Range mz 50 ## Range m/z
50 minmz = round(min(mz(msidata)), digits=2) 51 minmz = round(min(mz(msidata)), digits=2)
51 maxmz = round(max(mz(msidata)), digits=2) 52 maxmz = round(max(mz(msidata)), digits=2)
52 ## Number of spectra (pixels) 53 ## Number of spectra (pixels)
53 pixelcount = length(pixels(msidata)) 54 pixelcount = length(pixels(msidata))
54 ## Range x coordinates 55 ## Range x coordinates
57 ## Range y coordinates 58 ## Range y coordinates
58 minimumy = min(coord(msidata)[,2]) 59 minimumy = min(coord(msidata)[,2])
59 maximumy = max(coord(msidata)[,2]) 60 maximumy = max(coord(msidata)[,2])
60 ## Number of intensities > 0 61 ## Number of intensities > 0
61 npeaks= sum(spectra(msidata)[]>0) 62 npeaks= sum(spectra(msidata)[]>0)
62 ## Spectra multiplied with mz (potential number of peaks) 63 ## Spectra multiplied with m/z (potential number of peaks)
63 numpeaks = ncol(spectra(msidata)[])*nrow(spectra(msidata)[]) 64 numpeaks = ncol(spectra(msidata)[])*nrow(spectra(msidata)[])
64 ## Percentage of intensities > 0 65 ## Percentage of intensities > 0
65 percpeaks = round(npeaks/numpeaks*100, digits=2) 66 percpeaks = round(npeaks/numpeaks*100, digits=2)
66 ## Number of empty TICs 67 ## Number of empty TICs
67 TICs = colSums(spectra(msidata)[]) 68 TICs = colSums(spectra(msidata)[])
68 NumemptyTIC = sum(TICs == 0) 69 NumemptyTIC = sum(TICs == 0)
69 ## median TIC 70 ## median TIC
70 medint = round(median(TICs), digits=2) 71 medint = round(median(TICs), digits=2)
71 ## Store features for QC plot 72 ## Store features for QC plot
72 featuresinfile = mz(msidata) 73 featuresinfile = mz(msidata)
74
73 #end if 75 #end if
74 76
75
76 ###################################### Filtering of pixels ##################### 77 ###################################### Filtering of pixels #####################
77 78 ################################################################################
78 ### Pixels in the one column format "x=,y=" 79
80 #################### Pixels in the one column format "x=,y=" #####################
79 81
80 #if str($pixels_cond.pixel_filtering) == "single_column": 82 #if str($pixels_cond.pixel_filtering) == "single_column":
81 print("single column") 83 print("single column")
82 84
83 input_list = read.delim("$pixels_cond.single_pixels", header = FALSE, stringsAsFactors = FALSE) 85 input_list = read.delim("$pixels_cond.single_pixels", header = FALSE, stringsAsFactors = FALSE)
84 numberpixels = length(input_list[,$pixels_cond.pixel_column]) 86 numberpixels = length(input_list[,$pixels_cond.pixel_column])
85 valid_entries = input_list[,$pixels_cond.pixel_column] %in% names(pixels(msidata)) 87 valid_entries = input_list[,$pixels_cond.pixel_column] %in% names(pixels(msidata))
86 validpixels = sum(valid_entries) 88 validpixels = sum(valid_entries)
87 89
88 if (validpixels != 0) 90 if (validpixels != 0){
89 { 91 pixelsofinterest = pixels(msidata)[names(pixels(msidata)) %in% input_list[valid_entries,$pixels_cond.pixel_column]]
90 pixelsofinterest = pixels(msidata)[names(pixels(msidata)) %in% input_list[valid_entries,$pixels_cond.pixel_column]] 92 msidata = msidata[,pixelsofinterest]
91 msidata = msidata[,pixelsofinterest] 93 }else{
92 }else{ 94 msidata = msidata[,0]
93 msidata = msidata[,0] 95 validpixels=0}
94 validpixels=0 96
95 } 97 ############ Pixels in two columns format: x and y in different columns #############
96
97
98 ### Pixels in two columns format: x and y in different columns
99 98
100 #elif str($pixels_cond.pixel_filtering) == "two_columns": 99 #elif str($pixels_cond.pixel_filtering) == "two_columns":
101 print("two columns") 100 print("two columns")
102 101
103 input_list = read.delim("$pixels_cond.two_columns_pixel", header = FALSE, 102 input_list = read.delim("$pixels_cond.two_columns_pixel", header = FALSE,
104 stringsAsFactors = FALSE) 103 stringsAsFactors = FALSE)
105 numberpixels = length(input_list[,$pixels_cond.pixel_column_x]) 104 numberpixels = length(input_list[,$pixels_cond.pixel_column_x])
106 105
107 inputpixel_x = input_list[,$pixels_cond.pixel_column_x] 106 inputpixel_x = input_list[,$pixels_cond.pixel_column_x]
108 inputpixel_y = input_list[,$pixels_cond.pixel_column_y] 107 inputpixel_y = input_list[,$pixels_cond.pixel_column_y]
109 108 inputpixels = cbind(inputpixel_x, inputpixel_y)
110 inputpixels = cbind(inputpixel_x, inputpixel_y) 109 colnames(inputpixels) = c("x", "y")
111 colnames(inputpixels) = c("x", "y") 110 valid_rows = merge(inputpixels, coord(msidata)[,1:2])
112 valid_rows = merge(inputpixels, coord(msidata)[,1:2]) 111 validpixels = nrow(valid_rows)
113 validpixels = nrow(valid_rows) 112
114 113 if (validpixels != 0){
115 if (validpixels != 0) 114 pixelvector = character()
116 { 115 for (pixel in 1:nrow(valid_rows)){
117 116 pixelvector[pixel] = paste0("x = ", valid_rows[pixel,1],", ", "y = ", valid_rows[pixel,2])}
118 pixelvector = character() 117 pixelsofinterest= pixels(msidata)[names(pixels(msidata)) %in% pixelvector]
119 118 msidata = msidata[,pixelsofinterest]
120 for (pixel in 1:nrow(valid_rows)) 119 }else{
121 { 120 validpixels=0}
122 pixelvector[pixel] = paste0("x = ", valid_rows[pixel,1],", ", "y = ", valid_rows[pixel,2]) 121
123 } 122 ########### Pixels wihin x and y minima and maxima are kept ###################
124
125 pixelsofinterest= pixels(msidata)[names(pixels(msidata)) %in% pixelvector]
126 msidata = msidata[,pixelsofinterest]
127 }else{
128 validpixels=0
129 }
130
131
132 ### Pixels wihin x and y minima and maxima are kept:
133 123
134 #elif str($pixels_cond.pixel_filtering) == "pixel_range": 124 #elif str($pixels_cond.pixel_filtering) == "pixel_range":
135 print("pixel range") 125 print("pixel range")
136 126
137 numberpixels = "range" 127 numberpixels = "range"
138 validpixels = "range" 128 validpixels = "range"
139 129
140 ## only filter pixels if at least one pixel will be left 130 ## only filter pixels if at least one pixel will be left
141 131
142 if (sum(coord(msidata)\$x <= $pixels_cond.max_x_range & coord(msidata)\$x >= $pixels_cond.min_x_range) > 0 & sum(coord(msidata)\$y <= $pixels_cond.max_y_range & coord(msidata)\$y >= $pixels_cond.min_y_range) > 0) 132 if (sum(coord(msidata)\$x <= $pixels_cond.max_x_range & coord(msidata)\$x >= $pixels_cond.min_x_range) > 0 & sum(coord(msidata)\$y <= $pixels_cond.max_y_range & coord(msidata)\$y >= $pixels_cond.min_y_range) > 0){
143 {
144 msidata = msidata[, coord(msidata)\$x <= $pixels_cond.max_x_range & coord(msidata)\$x >= $pixels_cond.min_x_range] 133 msidata = msidata[, coord(msidata)\$x <= $pixels_cond.max_x_range & coord(msidata)\$x >= $pixels_cond.min_x_range]
145 msidata = msidata[, coord(msidata)\$y <= $pixels_cond.max_y_range & coord(msidata)\$y >= $pixels_cond.min_y_range] 134 msidata = msidata[, coord(msidata)\$y <= $pixels_cond.max_y_range & coord(msidata)\$y >= $pixels_cond.min_y_range]
146 }else{ 135 }else{
147 msidata = msidata[,0] 136 msidata = msidata[,0]
148 print("no valid pixel found") 137 print("no valid pixel found")}
149 }
150
151
152 138
153 #elif str($pixels_cond.pixel_filtering) == "none": 139 #elif str($pixels_cond.pixel_filtering) == "none":
154 print("no pixel filtering") 140 print("no pixel filtering")
141
155 numberpixels = 0 142 numberpixels = 0
156 validpixels = 0 143 validpixels = 0
157 144
158 #end if 145 #end if
159 146
160 147
161
162 ###################################### filtering of features ###################### 148 ###################################### filtering of features ######################
163 149 ##################################################################################
164 ### Tabular file contains mz either as numbers or in the format mz=800.01 150
151 ######################## Keep m/z from tabular file #########################
165 152
166 #if str($features_cond.features_filtering) == "features_list": 153 #if str($features_cond.features_filtering) == "features_list":
167 print("feature list") 154 print("feature list")
168 155
169 input_features = read.delim("$inputfeatures", header = FALSE, stringsAsFactors = FALSE) 156 input_features = read.delim("$inputfeatures", header = FALSE, stringsAsFactors = FALSE)
170
171 startingrow = $features_cond.feature_header+1 157 startingrow = $features_cond.feature_header+1
172 extracted_features = input_features[startingrow:nrow(input_features),$features_cond.feature_column] 158 extracted_features = input_features[startingrow:nrow(input_features),$features_cond.feature_column]
173 numberfeatures = length(extracted_features) 159 numberfeatures = length(extracted_features)
174 160
175 if (grepl("m/z = ", input_features[startingrow,$features_cond.feature_column])==FALSE) 161 if (grepl("m/z = ", input_features[startingrow,$features_cond.feature_column])==FALSE){
176 162
177 ### if input is in numeric format 163 ### if input is in numeric format
178 { 164 if (class(extracted_features) == "numeric"){
179 165 ### max digits given in the input file will be used to match m/z
180 if (class(extracted_features) == "numeric") 166 max_digits = max(nchar(matrix(unlist(strsplit(as.character(extracted_features), "\\.")), ncol=2, byrow=TRUE)[,2]))
181 { 167 validfeatures = extracted_features %in% round(mz(msidata),max_digits)
182 charactervector = rep("m/z = ", numberfeatures) 168 featuresofinterest = features(msidata)[round(mz(msidata), digits = max_digits) %in% extracted_features[validfeatures]]
183 mz_added = paste0(charactervector, round(extracted_features,digits=2)) 169 validmz = length(unique(featuresofinterest))
184 validfeatures = mz_added %in% names(features(msidata))
185 featuresofinterest = features(msidata)[names(features(msidata)) %in% mz_added[validfeatures]]
186 validmz = sum(validfeatures)
187 }else{ 170 }else{
188 validmz = 0 171 validmz = 0
189 featuresofinterest = 0 172 featuresofinterest = 0}
190 }
191 173
192 ### if input is already in character format (m/z = 800.01) 174 ### if input is already in character format (m/z = 800.01)
193 175
194 }else{ 176 }else{
195 validfeatures = extracted_features %in% names(features(msidata)) 177 validfeatures = extracted_features %in% names(features(msidata))
196 featuresofinterest = features(msidata)[names(features(msidata)) %in% extracted_features[validfeatures]] 178 featuresofinterest = features(msidata)[names(features(msidata)) %in% extracted_features[validfeatures]]
197 validmz = sum(validfeatures) 179 validmz = sum(validfeatures)}
198 }
199 180
200 ### filter msidata for valid features 181 ### filter msidata for valid features
182
201 msidata = msidata[featuresofinterest,] 183 msidata = msidata[featuresofinterest,]
202 184
203 185 ############### features within a given range are kept #########################
204 ### Only features within a given minimum and maximum value are kept:
205 186
206 #elif str($features_cond.features_filtering) == "features_range": 187 #elif str($features_cond.features_filtering) == "features_range":
207 print("feature range") 188 print("feature range")
208 189
209 numberfeatures = "range" 190 numberfeatures = "range"
210 validmz = "range" 191 validmz = "range"
211 192
212 if (sum(mz(msidata) >= $features_cond.min_mz & mz(msidata) <= $features_cond.max_mz)> 0) 193 if (sum(mz(msidata) >= $features_cond.min_mz & mz(msidata) <= $features_cond.max_mz)> 0){
213 { 194 msidata = msidata[mz(msidata) >= $features_cond.min_mz & mz(msidata) <= $features_cond.max_mz,]
214 msidata = msidata[mz(msidata) >= $features_cond.min_mz & mz(msidata) <= $features_cond.max_mz,]
215 }else{ 195 }else{
216 msidata = msidata[0,] 196 msidata = msidata[0,]
217 print("no valid mz range") 197 print("no valid mz range")}
198
199 ############### Remove m/z from tabular file #########################
200
201 #elif str($features_cond.features_filtering) == "remove_features":
202 print("remove features")
203
204 ### Tabular file contains mz either as numbers or in the format mz = 800.01
205
206 input_features = read.delim("$inputfeatures_removal", header = FALSE, stringsAsFactors = FALSE)
207 startingrow = $features_cond.removal_header+1
208 extracted_features = input_features[startingrow:nrow(input_features),$features_cond.removal_column]
209 numberfeatures = length(extracted_features)
210
211 if (grepl("m/z = ", input_features[startingrow,$features_cond.removal_column])==TRUE){
212
213 ### if input is mz = 800 character format
214 print("input is in format mz = 400")
215 validfeatures = extracted_features %in% names(features(msidata))
216 validmz = sum(validfeatures)
217 filtered_features = features(msidata)[names(features(msidata)) %in% extracted_features[validfeatures]]
218 featuresofinterest = mz(msidata)[filtered_features]
219
220 ### if input is numeric:
221 }else{
222 if (class(extracted_features) == "numeric"){
223 print("input is numeric")
224 featuresofinterest = extracted_features
225 validmz = sum(featuresofinterest <= max(mz(msidata))& featuresofinterest >= min(mz(msidata)))
226 }else{featuresofinterest = 0
227 validmz = 0}
218 } 228 }
229
230 ### Here starts removal of features:
231
232 plusminus = $features_cond.removal_plusminus
233
234 mass_to_remove = numeric()
235 if (sum(featuresofinterest) > 0){
236 for (masses in featuresofinterest){
237 #if str($features_cond.units_removal) == "ppm":
238 plusminus = masses * $features_cond.removal_plusminus/1000000
239 #end if
240 current_mass = which(c(mz(msidata) <= masses + plusminus & mz(msidata) >= masses - plusminus))
241 mass_to_remove = append(mass_to_remove, current_mass)}
242 msidata= msidata[-mass_to_remove, ]
243 }else{print("No features were removed as they were not fitting to m/z values and/or range")}
219 244
220 245
221 #elif str($features_cond.features_filtering) == "none": 246 #elif str($features_cond.features_filtering) == "none":
222 247
223 print("no feature filtering") 248 print("no feature filtering")
224 validmz = 0 249 validmz = 0
225 numberfeatures = 0 250 numberfeatures = 0
251
226 #end if 252 #end if
227 253
228 254 ## save msidata as Rfile
229
230 # save msidata as Rfile
231 save(msidata, file="$msidata_filtered") 255 save(msidata, file="$msidata_filtered")
232 256
233 ###################################### outputfile properties in numbers ######## 257 #################### optional QC numbers #######################
234 258
235 #if $outputs.outputs_select == "quality_control": 259 #if $outputs.outputs_select == "quality_control":
236 260
237 ## Number of features (mz) 261 ## Number of features (m/z)
238 maxfeatures2 = length(features(msidata)) 262 maxfeatures2 = length(features(msidata))
239 ## Range mz 263 ## Range m/z
240 minmz2 = round(min(mz(msidata)), digits=2) 264 minmz2 = round(min(mz(msidata)), digits=2)
241 maxmz2 = round(max(mz(msidata)), digits=2) 265 maxmz2 = round(max(mz(msidata)), digits=2)
242 ## Number of spectra (pixels) 266 ## Number of spectra (pixels)
243 pixelcount2 = length(pixels(msidata)) 267 pixelcount2 = length(pixels(msidata))
244 ## Range x coordinates 268 ## Range x coordinates
245 minimumx2 = min(coord(msidata)[,1]) 269 minimumx2 = min(coord(msidata)[,1])
246 maximumx2 = max(coord(msidata)[,1]) 270 maximumx2 = max(coord(msidata)[,1])
247 ## Range y coordinates 271 ## Range y coordinates
248 minimumy2 = min(coord(msidata)[,2]) 272 minimumy2 = min(coord(msidata)[,2])
249 maximumy2 = max(coord(msidata)[,2]) 273 maximumy2 = max(coord(msidata)[,2])
250 ## Number of intensities > 0 274 ## Number of intensities > 0
251 npeaks2= sum(spectra(msidata)[]>0) 275 npeaks2= sum(spectra(msidata)[]>0)
252 ## Spectra multiplied with mz (potential number of peaks) 276 ## Spectra multiplied with m/z (potential number of peaks)
253 numpeaks2 = ncol(spectra(msidata)[])*nrow(spectra(msidata)[]) 277 numpeaks2 = ncol(spectra(msidata)[])*nrow(spectra(msidata)[])
254 ## Percentage of intensities > 0 278 ## Percentage of intensities > 0
255 percpeaks2 = round(npeaks2/numpeaks2*100, digits=2) 279 percpeaks2 = round(npeaks2/numpeaks2*100, digits=2)
256 ## Number of empty TICs 280 ## Number of empty TICs
257 TICs2 = colSums(spectra(msidata)[]) 281 TICs2 = colSums(spectra(msidata)[])
258 NumemptyTIC2 = sum(TICs2 == 0) 282 NumemptyTIC2 = sum(TICs2 == 0)
259 ## median TIC 283 ## median TIC
260 medint2 = round(median(TICs2), digits=2) 284 medint2 = round(median(TICs2), digits=2)
261 285
262 286 properties = c("Number of m/z features",
263 properties = c("Number of mz features", 287 "Range of m/z values [Da]",
264 "Range of mz values [Da]", 288 "Number of pixels",
265 "Number of pixels", 289 "Range of x coordinates",
266 "Range of x coordinates", 290 "Range of y coordinates",
267 "Range of y coordinates", 291 "Intensities > 0",
268 "Intensities > 0", 292 "Median TIC per pixel",
269 "Median TIC per pixel", 293 "Number of zero TICs",
270 "Number of zero TICs", 294 "pixel overview",
271 "pixel overview", 295 "feature overview")
272 "feature overview") 296
273 297 before = c(paste0(maxfeatures),
274 before = c(paste0(maxfeatures), 298 paste0(minmz, " - ", maxmz),
275 paste0(minmz, " - ", maxmz), 299 paste0(pixelcount),
276 paste0(pixelcount), 300 paste0(minimumx, " - ", maximumx),
277 paste0(minimumx, " - ", maximumx), 301 paste0(minimumy, " - ", maximumy),
278 paste0(minimumy, " - ", maximumy), 302 paste0(percpeaks, " %"),
279 paste0(percpeaks, " %"), 303 paste0(medint),
280 paste0(medint), 304 paste0(NumemptyTIC),
281 paste0(NumemptyTIC), 305 paste0("input pixels: ", numberpixels),
282 paste0("input pixels: ", numberpixels), 306 paste0("input mz: ", numberfeatures))
283 paste0("input mz: ", numberfeatures)) 307
284 308 filtered = c(paste0(maxfeatures2),
285 filtered = c(paste0(maxfeatures2), 309 paste0(minmz2, " - ", maxmz2),
286 paste0(minmz2, " - ", maxmz2), 310 paste0(pixelcount2),
287 paste0(pixelcount2), 311 paste0(minimumx2, " - ", maximumx2),
288 paste0(minimumx2, " - ", maximumx2), 312 paste0(minimumy2, " - ", maximumy2),
289 paste0(minimumy2, " - ", maximumy2), 313 paste0(percpeaks2, " %"),
290 paste0(percpeaks2, " %"), 314 paste0(medint2),
291 paste0(medint2), 315 paste0(NumemptyTIC2),
292 paste0(NumemptyTIC2), 316 paste0("valid pixels: ", validpixels),
293 paste0("valid pixels: ", validpixels), 317 paste0("valid mz: ", validmz))
294 paste0("valid mz: ", validmz)) 318
295 319 property_df = data.frame(properties, before, filtered)
296 320
297 property_df = data.frame(properties, before, filtered) 321 ############################### optional PDF QC ################################
298
299
300
301 ######################################## PDF QC ################################
302 322
303 pdf("filtertool_QC.pdf", fonts = "Times", pointsize = 12) 323 pdf("filtertool_QC.pdf", fonts = "Times", pointsize = 12)
304 plot(0,type='n',axes=FALSE,ann=FALSE) 324 plot(0,type='n',axes=FALSE,ann=FALSE)
305
306 title(main=paste0("Qualitycontrol of filtering tool for file: \n\n", "$infile.display_name")) 325 title(main=paste0("Qualitycontrol of filtering tool for file: \n\n", "$infile.display_name"))
307
308
309
310 grid.table(property_df, rows= NULL) 326 grid.table(property_df, rows= NULL)
311 327
312 ### heatmap image as visual pixel control 328 ### heatmap image as visual pixel control
313 329 if (length(features(msidata))> 0 & length(pixels(msidata)) > 0){
314 if (length(features(msidata))> 0 & length(pixels(msidata)) > 0) 330 image(msidata, mz=$outputs.inputmz, plusminus = $outputs.plusminus_dalton, contrast.enhance = "none",
315 {
316
317
318 image(msidata, mz=$outputs.inputmz, plusminus = $outputs.plusminus_dalton, contrast.enhance = "none",
319 main= paste0($outputs.inputmz," ± ", $outputs.plusminus_dalton, " Da"), ylim = c(maximumy2+0.2*maximumy2,minimumy2-0.2*minimumy2)) 331 main= paste0($outputs.inputmz," ± ", $outputs.plusminus_dalton, " Da"), ylim = c(maximumy2+0.2*maximumy2,minimumy2-0.2*minimumy2))
320 332
321 ### control features which are left 333 ### control features which are left
322 334 plot(featuresinfile, rep(1,length(featuresinfile)), yaxt="n", ylab=NA, xlab="m/z values", col="red", ylim=c(0.8, 1.1), main="Distribution of m/z values")
323 plot(featuresinfile, rep(1,length(featuresinfile)), yaxt="n", ylab=NA, xlab="m/z values", col="red", ylim=c(0.8, 1.1), main="Distribution of m/z values") 335 lines(mz(msidata),rep(0.9, length(mz(msidata))), col="green", type="p")
324 lines(mz(msidata),rep(0.9, length(mz(msidata))), col="green", type="p") 336 legend("top", horiz=TRUE, legend = c("before", "filtered"), fill = c("red", "green"))
325 legend("top", horiz=TRUE, 337 }else{
326 legend = c("before", "filtered"), 338 print("file has no features or pixels left")}
327 fill = c("red", "green"))
328
329 }else{
330 print("file has no features or pixels left")
331 }
332 339
333 dev.off() 340 dev.off()
334 341
335 #end if 342 #end if
336 343
337 ######################################## intensity matrix ###################### 344 ############################### optional intensity matrix ######################
338 345
339 #if $output_matrix: 346 #if $output_matrix:
340 347
341 if (length(features(msidata))> 0 & length(pixels(msidata)) > 0) 348 if (length(features(msidata))> 0 & length(pixels(msidata)) > 0){
342 {
343
344 spectramatrix = spectra(msidata) 349 spectramatrix = spectra(msidata)
345 rownames(spectramatrix) = mz(msidata) 350 rownames(spectramatrix) = mz(msidata)
346 newmatrix = rbind(pixels(msidata), spectramatrix) 351 newmatrix = rbind(pixels(msidata), spectramatrix)
347 write.table(newmatrix[2:nrow(newmatrix),], file="$matrixasoutput", quote = FALSE, row.names = TRUE, col.names=NA, sep = "\t") 352 write.table(newmatrix[2:nrow(newmatrix),], file="$matrixasoutput", quote = FALSE, row.names = TRUE, col.names=NA, sep = "\t")
348
349 }else{ 353 }else{
350 print("file has no features or pixels left") 354 print("file has no features or pixels left")}
351 }
352 355
353 #end if 356 #end if
354 357
355 358
356 ]]></configfile> 359 ]]></configfile>
357 </configfiles> 360 </configfiles>
358 <inputs> 361 <inputs>
359 <param name="infile" type="data" format="imzml, rdata, analyze75" 362 <param name="infile" type="data" format="imzml,rdata,analyze75"
360 label="Inputfile as imzML, Analyze7.5 or Cardinal MSImageSet saved as RData" 363 label="Inputfile as imzML, Analyze7.5 or Cardinal MSImageSet saved as RData"
361 help="Upload composite datatype imzML (ibd+imzML) or analyze75 (hdr+img+t2m) or regular upload .RData (Cardinal MSImageSet)"/> 364 help="Upload composite datatype imzML (ibd+imzML) or analyze75 (hdr+img+t2m) or regular upload .RData (Cardinal MSImageSet)"/>
362 <conditional name="pixels_cond"> 365 <conditional name="pixels_cond">
363 <param name="pixel_filtering" type="select" label="Select pixel filtering option"> 366 <param name="pixel_filtering" type="select" label="Select pixel filtering option">
364 <option value="none" selected="True">none</option> 367 <option value="none" selected="True">none</option>
386 </when> 389 </when>
387 </conditional> 390 </conditional>
388 <conditional name="features_cond"> 391 <conditional name="features_cond">
389 <param name="features_filtering" type="select" label="Select feature filtering option"> 392 <param name="features_filtering" type="select" label="Select feature filtering option">
390 <option value="none" selected="True">none</option> 393 <option value="none" selected="True">none</option>
391 <option value="features_list">tabular file with features (data type: 800.12 or m/z = 800.12)</option> 394 <option value="features_list">keep features (tabular input)</option>
392 <option value="features_range">range of features</option> 395 <option value="features_range">keep features within a range (manual input)</option>
396 <option value="remove_features">remove features (tabular input)</option>
393 </param> 397 </param>
394 <when value="none"/> 398 <when value="none"/>
395 <when value="features_list"> 399 <when value="features_list">
396 <param name="inputfeatures" type="data" format="tabular" label="Features for filtering of MSI data" help="tabular file with masses of interest either as numbers (800.05) or in the form m/z = 800.05"/> 400 <param name="inputfeatures" type="data" format="tabular" label="Features for filtering of MSI data" help="tabular file with m/z of interest either as numbers (800.05) or in the form m/z = 800.05"/>
397 <param name="feature_column" data_ref="inputfeatures" label="Column with features" type="data_column"/> 401 <param name="feature_column" data_ref="inputfeatures" label="Column with features" type="data_column"/>
398 <param name="feature_header" label="Number of header lines to skip" value="0" type="integer"/> 402 <param name="feature_header" label="Number of header lines to skip" value="0" type="integer"/>
399 </when> 403 </when>
400 <when value="features_range"> 404 <when value="features_range">
401 <param name="min_mz" type="float" value="1" label="Minimum value for mz (in Dalton)"/> 405 <param name="min_mz" type="float" value="1" label="Minimum value for m/z"/>
402 <param name="max_mz" type="float" value="100" label="Maximum value for mz (in Dalton)"/> 406 <param name="max_mz" type="float" value="100" label="Maximum value for m/z"/>
407 </when>
408 <when value="remove_features">
409 <param name="inputfeatures_removal" type="data" format="tabular" label="Features for filtering of MSI data" help="tabular file with m/z to be removed either as numbers (800.05) or in the form m/z = 800.05"/>
410 <param name="removal_column" data_ref="inputfeatures_removal" label="Column with features" type="data_column"/>
411 <param name="removal_header" label="Number of header lines to skip" value="0" type="integer"/>
412 <param name="removal_plusminus" type="float" value="20" label="Window in which m/z will be removed" help="This value will be added and substracted from the given input value"/>
413 <param name="units_removal" type="select" display = "radio" optional = "False" label="units">
414 <option value="ppm" selected="True">ppm</option>
415 <option value="Da">Da</option>
416 </param>
403 </when> 417 </when>
404 </conditional> 418 </conditional>
405 <conditional name="outputs"> 419 <conditional name="outputs">
406 <param name="outputs_select" type="select" label="Quality control output"> 420 <param name="outputs_select" type="select" label="Quality control output">
407 <option value="quality_control" selected="True">yes</option> 421 <option value="quality_control" selected="True">yes</option>
408 <option value="no_quality_control">no</option> 422 <option value="no_quality_control">no</option>
409 </param> 423 </param>
410 <when value="quality_control"> 424 <when value="quality_control">
411 <param name="inputmz" type="float" value="1296.7" label="Mass for which a heatmap image will be drawn" help="Use a mass which is still present in all pixels to control if the pixel filtering went well"/> 425 <param name="inputmz" type="float" value="1296.7" label="M/z for which a heatmap image will be drawn" help="Use a m/z which is still present in all pixels to control if the pixel filtering went well"/>
412 <param name="plusminus_dalton" value="0.25" type="float" label="mass range for mz value" help="plusminus mass window in Dalton"/> 426 <param name="plusminus_dalton" value="0.25" type="float" label="Range for m/z value" help="plusminus m/z window"/>
413 </when> 427 </when>
414 <when value="no_quality_control"/> 428 <when value="no_quality_control"/>
415 </conditional> 429 </conditional>
416 <param name="output_matrix" type="boolean" display="radio" label="Intensity matrix output"/> 430 <param name="output_matrix" type="boolean" display="radio" label="Intensity matrix output"/>
417 </inputs> 431 </inputs>
418 <outputs> 432 <outputs>
419 <data format="rdata" name="msidata_filtered" label="${tool.name} ${on_string}"/> 433 <data format="rdata" name="msidata_filtered" label="$infile.display_name filtered"/>
420 <data format="pdf" name="filtering_qc" from_work_dir="filtertool_QC.pdf" label = "QC ${tool.name} ${on_string}"> 434 <data format="pdf" name="filtering_qc" from_work_dir="filtertool_QC.pdf" label = "$infile.display_name filtered_QC">
421 <filter>outputs["outputs_select"] == "quality_control"</filter> 435 <filter>outputs["outputs_select"] == "quality_control"</filter>
422 </data> 436 </data>
423 <data format="tabular" name="matrixasoutput" label="Matrix ${tool.name} ${on_string}"> 437 <data format="tabular" name="matrixasoutput" label="$infile.display_name filtered_matrix">
424 <filter>output_matrix</filter> 438 <filter>output_matrix</filter>
425 </data> 439 </data>
426 </outputs> 440 </outputs>
427 <tests> 441 <tests>
428 <test expect_num_outputs="2"> 442 <test expect_num_outputs="2">
468 <param name="min_x_range" value="1"/> 482 <param name="min_x_range" value="1"/>
469 <param name="max_x_range" value="20"/> 483 <param name="max_x_range" value="20"/>
470 <param name="min_y_range" value="2"/> 484 <param name="min_y_range" value="2"/>
471 <param name="max_y_range" value="2"/> 485 <param name="max_y_range" value="2"/>
472 <param name="features_filtering" value="features_range"/> 486 <param name="features_filtering" value="features_range"/>
473 <param name="min_mz" value="0" /> 487 <param name="min_mz" value="350" />
474 <param name="max_mz" value="500"/> 488 <param name="max_mz" value="500"/>
475 <param name="outputs_select" value="quality_control"/> 489 <param name="outputs_select" value="quality_control"/>
476 <param name="inputmz" value="328.9"/> 490 <param name="inputmz" value="328.9"/>
477 <param name="plusminus_dalton" value="0.25"/> 491 <param name="plusminus_dalton" value="0.25"/>
478 <param name="output_matrix" value="True"/> 492 <param name="output_matrix" value="True"/>
507 <param name="pixel_filtering" value="pixel_range"/> 521 <param name="pixel_filtering" value="pixel_range"/>
508 <param name="min_x_range" value="0"/> 522 <param name="min_x_range" value="0"/>
509 <param name="max_x_range" value="10"/> 523 <param name="max_x_range" value="10"/>
510 <param name="min_y_range" value="2"/> 524 <param name="min_y_range" value="2"/>
511 <param name="max_y_range" value="20"/> 525 <param name="max_y_range" value="20"/>
512 <param name="features_filtering" value="features_range"/> 526 <param name="features_filtering" value="features_list"/>
513 <param name="min_mz" value="500" /> 527 <param name="inputfeatures" ftype="tabular" value = "featuresofinterest5.tabular"/>
514 <param name="max_mz" value="700"/> 528 <param name="feature_column" value="1"/>
529 <param name="feature_header" value="0"/>
515 <param name="outputs_select" value="quality_control"/> 530 <param name="outputs_select" value="quality_control"/>
516 <param name="inputmz" value="328.9"/> 531 <param name="inputmz" value="328.9"/>
517 <param name="plusminus_dalton" value="0.25"/> 532 <param name="plusminus_dalton" value="0.25"/>
518 <output name="filtering_qc" file="imzml_filtered5.pdf" compare="sim_size" delta="20000"/> 533 <output name="filtering_qc" file="imzml_filtered5.pdf" compare="sim_size" delta="20000"/>
519 <output name="msidata_filtered" file="imzml_filtered5.RData" compare="sim_size" /> 534 <output name="msidata_filtered" file="imzml_filtered5.RData" compare="sim_size" />
567 <help> 582 <help>
568 <![CDATA[ 583 <![CDATA[
569 584
570 Cardinal is an R package that implements statistical & computational tools for analyzing mass spectrometry imaging datasets. `More information on Cardinal <http://cardinalmsi.org//>`_ 585 Cardinal is an R package that implements statistical & computational tools for analyzing mass spectrometry imaging datasets. `More information on Cardinal <http://cardinalmsi.org//>`_
571 586
572 This tool provides provides options to filter (subset) pixels and masses of mass-spectrometry imaging data. 587 This tool provides provides options to filter (subset) pixels and m/z features of mass spectrometry imaging data.
573 588
574 Input data: 3 types of input data can be used: 589 Input data: 3 types of input data can be used:
575 590
576 - imzml file (upload imzml and ibd file via the "composite" function) `Introduction to the imzml format <https://ms-imaging.org/wp/imzml/>`_ 591 - imzml file (upload imzml and ibd file via the "composite" function) `Introduction to the imzml format <https://ms-imaging.org/wp/imzml/>`_
577 - Analyze7.5 (upload hdr, img and t2m file via the "composite" function) 592 - Analyze7.5 (upload hdr, img and t2m file via the "composite" function)
578 - Cardinal "MSImageSet" data (with variable name "msidata", saved as .RData) 593 - Cardinal "MSImageSet" data (with variable name "msidata", saved as .RData)
579 594
595
580 Options: 596 Options:
581 597
582 - pixel filtering: can use a tabular file containing x and y coordinates or by defining a range for x and y by hand 598 - pixel filtering: can use a tabular file containing x and y coordinates or by defining a range for x and y by hand
583 - mass filtering: can use a tabular file containing masses of interest or by defining a range for the mass values 599 - m/z feature filtering: can use a tabular file containing m/z of interest or by defining a range for the m/z values (! numeric input will be rounded to 2 digits before matching to m/z!)
600 - m/z feature removing: infering m/z such as matrix contaminants can be removed by specifying their m/z in a tabular file and optionally set a window (window in ppm or Da in which peaks should be removed)
601
584 602
585 Output: 603 Output:
586 604
587 - imzML file filtered for pixels and/or masses 605 - imzML file filtered for pixels and/or m/z
588 - optional: pdf with heatmap showing the pixels that are left after filtering and plot of masses before and after filtering 606 - optional: pdf with heatmap showing the pixels that are left after filtering and plot of m/z before and after filtering
589 - optional: intensity matrix as tabular file (intensities for masses in rows and pixel in columns) 607 - optional: intensity matrix as tabular file (intensities for m/z in rows and pixel in columns)
608
590 609
591 Tip: 610 Tip:
592 611
593 - It is recommended to use the filtering tool only for masses which have been extracted from the same dataset. If you have masses from dataset A and you want to use them to filter dataset B, first find the corresponding (closest) features in dataset B by using the tool "Join two files on column allowing a small difference". Afterwards use the corresponding feature masses from dataset A to filter dataset B. 612 - It is recommended to use the filtering tool only for m/z which have been extracted from the same dataset. If you have m/z from dataset A and you want to use them to filter dataset B, first find the corresponding (closest) features in dataset B by using the tool "Join two files on column allowing a small difference". Afterwards use the corresponding feature m/z from dataset A to filter dataset B.
594 613
595 614
596 ]]> 615 ]]>
597 </help> 616 </help>
598 <citations> 617 <citations>