Mercurial > repos > galaxyp > msi_filtering
comparison msi_filtering.xml @ 3:d51c3c814d57 draft
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/msi_filtering commit a7be47698f53eb4f00961192327d93e8989276a7
author | galaxyp |
---|---|
date | Mon, 11 Jun 2018 17:33:40 -0400 |
parents | 22db5eb94e50 |
children | bf61fc662615 |
comparison
equal
deleted
inserted
replaced
2:22db5eb94e50 | 3:d51c3c814d57 |
---|---|
1 <tool id="mass_spectrometry_imaging_filtering" name="MSI filtering" version="1.10.0.0"> | 1 <tool id="mass_spectrometry_imaging_filtering" name="MSI filtering" version="1.10.0.1"> |
2 <description>tool for filtering mass spectrometry imaging data</description> | 2 <description>tool for filtering mass spectrometry imaging data</description> |
3 <requirements> | 3 <requirements> |
4 <requirement type="package" version="1.10.0">bioconductor-cardinal</requirement> | 4 <requirement type="package" version="1.10.0">bioconductor-cardinal</requirement> |
5 <requirement type="package" version="2.2.1">r-gridextra</requirement> | 5 <requirement type="package" version="2.2.1">r-gridextra</requirement> |
6 </requirements> | 6 </requirements> |
39 msidata = readAnalyze('infile') | 39 msidata = readAnalyze('infile') |
40 #else | 40 #else |
41 load('infile.RData') | 41 load('infile.RData') |
42 #end if | 42 #end if |
43 | 43 |
44 ##################################### QC: inputfile properties in numbers ###### | 44 ########################### optional QC numbers ######################## |
45 | 45 |
46 #if $outputs.outputs_select == "quality_control": | 46 #if $outputs.outputs_select == "quality_control": |
47 ## Number of features (mz) | 47 |
48 ## Number of features (m/z) | |
48 maxfeatures = length(features(msidata)) | 49 maxfeatures = length(features(msidata)) |
49 ## Range mz | 50 ## Range m/z |
50 minmz = round(min(mz(msidata)), digits=2) | 51 minmz = round(min(mz(msidata)), digits=2) |
51 maxmz = round(max(mz(msidata)), digits=2) | 52 maxmz = round(max(mz(msidata)), digits=2) |
52 ## Number of spectra (pixels) | 53 ## Number of spectra (pixels) |
53 pixelcount = length(pixels(msidata)) | 54 pixelcount = length(pixels(msidata)) |
54 ## Range x coordinates | 55 ## Range x coordinates |
57 ## Range y coordinates | 58 ## Range y coordinates |
58 minimumy = min(coord(msidata)[,2]) | 59 minimumy = min(coord(msidata)[,2]) |
59 maximumy = max(coord(msidata)[,2]) | 60 maximumy = max(coord(msidata)[,2]) |
60 ## Number of intensities > 0 | 61 ## Number of intensities > 0 |
61 npeaks= sum(spectra(msidata)[]>0) | 62 npeaks= sum(spectra(msidata)[]>0) |
62 ## Spectra multiplied with mz (potential number of peaks) | 63 ## Spectra multiplied with m/z (potential number of peaks) |
63 numpeaks = ncol(spectra(msidata)[])*nrow(spectra(msidata)[]) | 64 numpeaks = ncol(spectra(msidata)[])*nrow(spectra(msidata)[]) |
64 ## Percentage of intensities > 0 | 65 ## Percentage of intensities > 0 |
65 percpeaks = round(npeaks/numpeaks*100, digits=2) | 66 percpeaks = round(npeaks/numpeaks*100, digits=2) |
66 ## Number of empty TICs | 67 ## Number of empty TICs |
67 TICs = colSums(spectra(msidata)[]) | 68 TICs = colSums(spectra(msidata)[]) |
68 NumemptyTIC = sum(TICs == 0) | 69 NumemptyTIC = sum(TICs == 0) |
69 ## median TIC | 70 ## median TIC |
70 medint = round(median(TICs), digits=2) | 71 medint = round(median(TICs), digits=2) |
71 ## Store features for QC plot | 72 ## Store features for QC plot |
72 featuresinfile = mz(msidata) | 73 featuresinfile = mz(msidata) |
74 | |
73 #end if | 75 #end if |
74 | 76 |
75 | |
76 ###################################### Filtering of pixels ##################### | 77 ###################################### Filtering of pixels ##################### |
77 | 78 ################################################################################ |
78 ### Pixels in the one column format "x=,y=" | 79 |
80 #################### Pixels in the one column format "x=,y=" ##################### | |
79 | 81 |
80 #if str($pixels_cond.pixel_filtering) == "single_column": | 82 #if str($pixels_cond.pixel_filtering) == "single_column": |
81 print("single column") | 83 print("single column") |
82 | 84 |
83 input_list = read.delim("$pixels_cond.single_pixels", header = FALSE, stringsAsFactors = FALSE) | 85 input_list = read.delim("$pixels_cond.single_pixels", header = FALSE, stringsAsFactors = FALSE) |
84 numberpixels = length(input_list[,$pixels_cond.pixel_column]) | 86 numberpixels = length(input_list[,$pixels_cond.pixel_column]) |
85 valid_entries = input_list[,$pixels_cond.pixel_column] %in% names(pixels(msidata)) | 87 valid_entries = input_list[,$pixels_cond.pixel_column] %in% names(pixels(msidata)) |
86 validpixels = sum(valid_entries) | 88 validpixels = sum(valid_entries) |
87 | 89 |
88 if (validpixels != 0) | 90 if (validpixels != 0){ |
89 { | 91 pixelsofinterest = pixels(msidata)[names(pixels(msidata)) %in% input_list[valid_entries,$pixels_cond.pixel_column]] |
90 pixelsofinterest = pixels(msidata)[names(pixels(msidata)) %in% input_list[valid_entries,$pixels_cond.pixel_column]] | 92 msidata = msidata[,pixelsofinterest] |
91 msidata = msidata[,pixelsofinterest] | 93 }else{ |
92 }else{ | 94 msidata = msidata[,0] |
93 msidata = msidata[,0] | 95 validpixels=0} |
94 validpixels=0 | 96 |
95 } | 97 ############ Pixels in two columns format: x and y in different columns ############# |
96 | |
97 | |
98 ### Pixels in two columns format: x and y in different columns | |
99 | 98 |
100 #elif str($pixels_cond.pixel_filtering) == "two_columns": | 99 #elif str($pixels_cond.pixel_filtering) == "two_columns": |
101 print("two columns") | 100 print("two columns") |
102 | 101 |
103 input_list = read.delim("$pixels_cond.two_columns_pixel", header = FALSE, | 102 input_list = read.delim("$pixels_cond.two_columns_pixel", header = FALSE, |
104 stringsAsFactors = FALSE) | 103 stringsAsFactors = FALSE) |
105 numberpixels = length(input_list[,$pixels_cond.pixel_column_x]) | 104 numberpixels = length(input_list[,$pixels_cond.pixel_column_x]) |
106 | 105 |
107 inputpixel_x = input_list[,$pixels_cond.pixel_column_x] | 106 inputpixel_x = input_list[,$pixels_cond.pixel_column_x] |
108 inputpixel_y = input_list[,$pixels_cond.pixel_column_y] | 107 inputpixel_y = input_list[,$pixels_cond.pixel_column_y] |
109 | 108 inputpixels = cbind(inputpixel_x, inputpixel_y) |
110 inputpixels = cbind(inputpixel_x, inputpixel_y) | 109 colnames(inputpixels) = c("x", "y") |
111 colnames(inputpixels) = c("x", "y") | 110 valid_rows = merge(inputpixels, coord(msidata)[,1:2]) |
112 valid_rows = merge(inputpixels, coord(msidata)[,1:2]) | 111 validpixels = nrow(valid_rows) |
113 validpixels = nrow(valid_rows) | 112 |
114 | 113 if (validpixels != 0){ |
115 if (validpixels != 0) | 114 pixelvector = character() |
116 { | 115 for (pixel in 1:nrow(valid_rows)){ |
117 | 116 pixelvector[pixel] = paste0("x = ", valid_rows[pixel,1],", ", "y = ", valid_rows[pixel,2])} |
118 pixelvector = character() | 117 pixelsofinterest= pixels(msidata)[names(pixels(msidata)) %in% pixelvector] |
119 | 118 msidata = msidata[,pixelsofinterest] |
120 for (pixel in 1:nrow(valid_rows)) | 119 }else{ |
121 { | 120 validpixels=0} |
122 pixelvector[pixel] = paste0("x = ", valid_rows[pixel,1],", ", "y = ", valid_rows[pixel,2]) | 121 |
123 } | 122 ########### Pixels wihin x and y minima and maxima are kept ################### |
124 | |
125 pixelsofinterest= pixels(msidata)[names(pixels(msidata)) %in% pixelvector] | |
126 msidata = msidata[,pixelsofinterest] | |
127 }else{ | |
128 validpixels=0 | |
129 } | |
130 | |
131 | |
132 ### Pixels wihin x and y minima and maxima are kept: | |
133 | 123 |
134 #elif str($pixels_cond.pixel_filtering) == "pixel_range": | 124 #elif str($pixels_cond.pixel_filtering) == "pixel_range": |
135 print("pixel range") | 125 print("pixel range") |
136 | 126 |
137 numberpixels = "range" | 127 numberpixels = "range" |
138 validpixels = "range" | 128 validpixels = "range" |
139 | 129 |
140 ## only filter pixels if at least one pixel will be left | 130 ## only filter pixels if at least one pixel will be left |
141 | 131 |
142 if (sum(coord(msidata)\$x <= $pixels_cond.max_x_range & coord(msidata)\$x >= $pixels_cond.min_x_range) > 0 & sum(coord(msidata)\$y <= $pixels_cond.max_y_range & coord(msidata)\$y >= $pixels_cond.min_y_range) > 0) | 132 if (sum(coord(msidata)\$x <= $pixels_cond.max_x_range & coord(msidata)\$x >= $pixels_cond.min_x_range) > 0 & sum(coord(msidata)\$y <= $pixels_cond.max_y_range & coord(msidata)\$y >= $pixels_cond.min_y_range) > 0){ |
143 { | |
144 msidata = msidata[, coord(msidata)\$x <= $pixels_cond.max_x_range & coord(msidata)\$x >= $pixels_cond.min_x_range] | 133 msidata = msidata[, coord(msidata)\$x <= $pixels_cond.max_x_range & coord(msidata)\$x >= $pixels_cond.min_x_range] |
145 msidata = msidata[, coord(msidata)\$y <= $pixels_cond.max_y_range & coord(msidata)\$y >= $pixels_cond.min_y_range] | 134 msidata = msidata[, coord(msidata)\$y <= $pixels_cond.max_y_range & coord(msidata)\$y >= $pixels_cond.min_y_range] |
146 }else{ | 135 }else{ |
147 msidata = msidata[,0] | 136 msidata = msidata[,0] |
148 print("no valid pixel found") | 137 print("no valid pixel found")} |
149 } | |
150 | |
151 | |
152 | 138 |
153 #elif str($pixels_cond.pixel_filtering) == "none": | 139 #elif str($pixels_cond.pixel_filtering) == "none": |
154 print("no pixel filtering") | 140 print("no pixel filtering") |
141 | |
155 numberpixels = 0 | 142 numberpixels = 0 |
156 validpixels = 0 | 143 validpixels = 0 |
157 | 144 |
158 #end if | 145 #end if |
159 | 146 |
160 | 147 |
161 | |
162 ###################################### filtering of features ###################### | 148 ###################################### filtering of features ###################### |
163 | 149 ################################################################################## |
164 ### Tabular file contains mz either as numbers or in the format mz=800.01 | 150 |
151 ######################## Keep m/z from tabular file ######################### | |
165 | 152 |
166 #if str($features_cond.features_filtering) == "features_list": | 153 #if str($features_cond.features_filtering) == "features_list": |
167 print("feature list") | 154 print("feature list") |
168 | 155 |
169 input_features = read.delim("$inputfeatures", header = FALSE, stringsAsFactors = FALSE) | 156 input_features = read.delim("$inputfeatures", header = FALSE, stringsAsFactors = FALSE) |
170 | |
171 startingrow = $features_cond.feature_header+1 | 157 startingrow = $features_cond.feature_header+1 |
172 extracted_features = input_features[startingrow:nrow(input_features),$features_cond.feature_column] | 158 extracted_features = input_features[startingrow:nrow(input_features),$features_cond.feature_column] |
173 numberfeatures = length(extracted_features) | 159 numberfeatures = length(extracted_features) |
174 | 160 |
175 if (grepl("m/z = ", input_features[startingrow,$features_cond.feature_column])==FALSE) | 161 if (grepl("m/z = ", input_features[startingrow,$features_cond.feature_column])==FALSE){ |
176 | 162 |
177 ### if input is in numeric format | 163 ### if input is in numeric format |
178 { | 164 if (class(extracted_features) == "numeric"){ |
179 | 165 ### max digits given in the input file will be used to match m/z |
180 if (class(extracted_features) == "numeric") | 166 max_digits = max(nchar(matrix(unlist(strsplit(as.character(extracted_features), "\\.")), ncol=2, byrow=TRUE)[,2])) |
181 { | 167 validfeatures = extracted_features %in% round(mz(msidata),max_digits) |
182 charactervector = rep("m/z = ", numberfeatures) | 168 featuresofinterest = features(msidata)[round(mz(msidata), digits = max_digits) %in% extracted_features[validfeatures]] |
183 mz_added = paste0(charactervector, round(extracted_features,digits=2)) | 169 validmz = length(unique(featuresofinterest)) |
184 validfeatures = mz_added %in% names(features(msidata)) | |
185 featuresofinterest = features(msidata)[names(features(msidata)) %in% mz_added[validfeatures]] | |
186 validmz = sum(validfeatures) | |
187 }else{ | 170 }else{ |
188 validmz = 0 | 171 validmz = 0 |
189 featuresofinterest = 0 | 172 featuresofinterest = 0} |
190 } | |
191 | 173 |
192 ### if input is already in character format (m/z = 800.01) | 174 ### if input is already in character format (m/z = 800.01) |
193 | 175 |
194 }else{ | 176 }else{ |
195 validfeatures = extracted_features %in% names(features(msidata)) | 177 validfeatures = extracted_features %in% names(features(msidata)) |
196 featuresofinterest = features(msidata)[names(features(msidata)) %in% extracted_features[validfeatures]] | 178 featuresofinterest = features(msidata)[names(features(msidata)) %in% extracted_features[validfeatures]] |
197 validmz = sum(validfeatures) | 179 validmz = sum(validfeatures)} |
198 } | |
199 | 180 |
200 ### filter msidata for valid features | 181 ### filter msidata for valid features |
182 | |
201 msidata = msidata[featuresofinterest,] | 183 msidata = msidata[featuresofinterest,] |
202 | 184 |
203 | 185 ############### features within a given range are kept ######################### |
204 ### Only features within a given minimum and maximum value are kept: | |
205 | 186 |
206 #elif str($features_cond.features_filtering) == "features_range": | 187 #elif str($features_cond.features_filtering) == "features_range": |
207 print("feature range") | 188 print("feature range") |
208 | 189 |
209 numberfeatures = "range" | 190 numberfeatures = "range" |
210 validmz = "range" | 191 validmz = "range" |
211 | 192 |
212 if (sum(mz(msidata) >= $features_cond.min_mz & mz(msidata) <= $features_cond.max_mz)> 0) | 193 if (sum(mz(msidata) >= $features_cond.min_mz & mz(msidata) <= $features_cond.max_mz)> 0){ |
213 { | 194 msidata = msidata[mz(msidata) >= $features_cond.min_mz & mz(msidata) <= $features_cond.max_mz,] |
214 msidata = msidata[mz(msidata) >= $features_cond.min_mz & mz(msidata) <= $features_cond.max_mz,] | |
215 }else{ | 195 }else{ |
216 msidata = msidata[0,] | 196 msidata = msidata[0,] |
217 print("no valid mz range") | 197 print("no valid mz range")} |
198 | |
199 ############### Remove m/z from tabular file ######################### | |
200 | |
201 #elif str($features_cond.features_filtering) == "remove_features": | |
202 print("remove features") | |
203 | |
204 ### Tabular file contains mz either as numbers or in the format mz = 800.01 | |
205 | |
206 input_features = read.delim("$inputfeatures_removal", header = FALSE, stringsAsFactors = FALSE) | |
207 startingrow = $features_cond.removal_header+1 | |
208 extracted_features = input_features[startingrow:nrow(input_features),$features_cond.removal_column] | |
209 numberfeatures = length(extracted_features) | |
210 | |
211 if (grepl("m/z = ", input_features[startingrow,$features_cond.removal_column])==TRUE){ | |
212 | |
213 ### if input is mz = 800 character format | |
214 print("input is in format mz = 400") | |
215 validfeatures = extracted_features %in% names(features(msidata)) | |
216 validmz = sum(validfeatures) | |
217 filtered_features = features(msidata)[names(features(msidata)) %in% extracted_features[validfeatures]] | |
218 featuresofinterest = mz(msidata)[filtered_features] | |
219 | |
220 ### if input is numeric: | |
221 }else{ | |
222 if (class(extracted_features) == "numeric"){ | |
223 print("input is numeric") | |
224 featuresofinterest = extracted_features | |
225 validmz = sum(featuresofinterest <= max(mz(msidata))& featuresofinterest >= min(mz(msidata))) | |
226 }else{featuresofinterest = 0 | |
227 validmz = 0} | |
218 } | 228 } |
229 | |
230 ### Here starts removal of features: | |
231 | |
232 plusminus = $features_cond.removal_plusminus | |
233 | |
234 mass_to_remove = numeric() | |
235 if (sum(featuresofinterest) > 0){ | |
236 for (masses in featuresofinterest){ | |
237 #if str($features_cond.units_removal) == "ppm": | |
238 plusminus = masses * $features_cond.removal_plusminus/1000000 | |
239 #end if | |
240 current_mass = which(c(mz(msidata) <= masses + plusminus & mz(msidata) >= masses - plusminus)) | |
241 mass_to_remove = append(mass_to_remove, current_mass)} | |
242 msidata= msidata[-mass_to_remove, ] | |
243 }else{print("No features were removed as they were not fitting to m/z values and/or range")} | |
219 | 244 |
220 | 245 |
221 #elif str($features_cond.features_filtering) == "none": | 246 #elif str($features_cond.features_filtering) == "none": |
222 | 247 |
223 print("no feature filtering") | 248 print("no feature filtering") |
224 validmz = 0 | 249 validmz = 0 |
225 numberfeatures = 0 | 250 numberfeatures = 0 |
251 | |
226 #end if | 252 #end if |
227 | 253 |
228 | 254 ## save msidata as Rfile |
229 | |
230 # save msidata as Rfile | |
231 save(msidata, file="$msidata_filtered") | 255 save(msidata, file="$msidata_filtered") |
232 | 256 |
233 ###################################### outputfile properties in numbers ######## | 257 #################### optional QC numbers ####################### |
234 | 258 |
235 #if $outputs.outputs_select == "quality_control": | 259 #if $outputs.outputs_select == "quality_control": |
236 | 260 |
237 ## Number of features (mz) | 261 ## Number of features (m/z) |
238 maxfeatures2 = length(features(msidata)) | 262 maxfeatures2 = length(features(msidata)) |
239 ## Range mz | 263 ## Range m/z |
240 minmz2 = round(min(mz(msidata)), digits=2) | 264 minmz2 = round(min(mz(msidata)), digits=2) |
241 maxmz2 = round(max(mz(msidata)), digits=2) | 265 maxmz2 = round(max(mz(msidata)), digits=2) |
242 ## Number of spectra (pixels) | 266 ## Number of spectra (pixels) |
243 pixelcount2 = length(pixels(msidata)) | 267 pixelcount2 = length(pixels(msidata)) |
244 ## Range x coordinates | 268 ## Range x coordinates |
245 minimumx2 = min(coord(msidata)[,1]) | 269 minimumx2 = min(coord(msidata)[,1]) |
246 maximumx2 = max(coord(msidata)[,1]) | 270 maximumx2 = max(coord(msidata)[,1]) |
247 ## Range y coordinates | 271 ## Range y coordinates |
248 minimumy2 = min(coord(msidata)[,2]) | 272 minimumy2 = min(coord(msidata)[,2]) |
249 maximumy2 = max(coord(msidata)[,2]) | 273 maximumy2 = max(coord(msidata)[,2]) |
250 ## Number of intensities > 0 | 274 ## Number of intensities > 0 |
251 npeaks2= sum(spectra(msidata)[]>0) | 275 npeaks2= sum(spectra(msidata)[]>0) |
252 ## Spectra multiplied with mz (potential number of peaks) | 276 ## Spectra multiplied with m/z (potential number of peaks) |
253 numpeaks2 = ncol(spectra(msidata)[])*nrow(spectra(msidata)[]) | 277 numpeaks2 = ncol(spectra(msidata)[])*nrow(spectra(msidata)[]) |
254 ## Percentage of intensities > 0 | 278 ## Percentage of intensities > 0 |
255 percpeaks2 = round(npeaks2/numpeaks2*100, digits=2) | 279 percpeaks2 = round(npeaks2/numpeaks2*100, digits=2) |
256 ## Number of empty TICs | 280 ## Number of empty TICs |
257 TICs2 = colSums(spectra(msidata)[]) | 281 TICs2 = colSums(spectra(msidata)[]) |
258 NumemptyTIC2 = sum(TICs2 == 0) | 282 NumemptyTIC2 = sum(TICs2 == 0) |
259 ## median TIC | 283 ## median TIC |
260 medint2 = round(median(TICs2), digits=2) | 284 medint2 = round(median(TICs2), digits=2) |
261 | 285 |
262 | 286 properties = c("Number of m/z features", |
263 properties = c("Number of mz features", | 287 "Range of m/z values [Da]", |
264 "Range of mz values [Da]", | 288 "Number of pixels", |
265 "Number of pixels", | 289 "Range of x coordinates", |
266 "Range of x coordinates", | 290 "Range of y coordinates", |
267 "Range of y coordinates", | 291 "Intensities > 0", |
268 "Intensities > 0", | 292 "Median TIC per pixel", |
269 "Median TIC per pixel", | 293 "Number of zero TICs", |
270 "Number of zero TICs", | 294 "pixel overview", |
271 "pixel overview", | 295 "feature overview") |
272 "feature overview") | 296 |
273 | 297 before = c(paste0(maxfeatures), |
274 before = c(paste0(maxfeatures), | 298 paste0(minmz, " - ", maxmz), |
275 paste0(minmz, " - ", maxmz), | 299 paste0(pixelcount), |
276 paste0(pixelcount), | 300 paste0(minimumx, " - ", maximumx), |
277 paste0(minimumx, " - ", maximumx), | 301 paste0(minimumy, " - ", maximumy), |
278 paste0(minimumy, " - ", maximumy), | 302 paste0(percpeaks, " %"), |
279 paste0(percpeaks, " %"), | 303 paste0(medint), |
280 paste0(medint), | 304 paste0(NumemptyTIC), |
281 paste0(NumemptyTIC), | 305 paste0("input pixels: ", numberpixels), |
282 paste0("input pixels: ", numberpixels), | 306 paste0("input mz: ", numberfeatures)) |
283 paste0("input mz: ", numberfeatures)) | 307 |
284 | 308 filtered = c(paste0(maxfeatures2), |
285 filtered = c(paste0(maxfeatures2), | 309 paste0(minmz2, " - ", maxmz2), |
286 paste0(minmz2, " - ", maxmz2), | 310 paste0(pixelcount2), |
287 paste0(pixelcount2), | 311 paste0(minimumx2, " - ", maximumx2), |
288 paste0(minimumx2, " - ", maximumx2), | 312 paste0(minimumy2, " - ", maximumy2), |
289 paste0(minimumy2, " - ", maximumy2), | 313 paste0(percpeaks2, " %"), |
290 paste0(percpeaks2, " %"), | 314 paste0(medint2), |
291 paste0(medint2), | 315 paste0(NumemptyTIC2), |
292 paste0(NumemptyTIC2), | 316 paste0("valid pixels: ", validpixels), |
293 paste0("valid pixels: ", validpixels), | 317 paste0("valid mz: ", validmz)) |
294 paste0("valid mz: ", validmz)) | 318 |
295 | 319 property_df = data.frame(properties, before, filtered) |
296 | 320 |
297 property_df = data.frame(properties, before, filtered) | 321 ############################### optional PDF QC ################################ |
298 | |
299 | |
300 | |
301 ######################################## PDF QC ################################ | |
302 | 322 |
303 pdf("filtertool_QC.pdf", fonts = "Times", pointsize = 12) | 323 pdf("filtertool_QC.pdf", fonts = "Times", pointsize = 12) |
304 plot(0,type='n',axes=FALSE,ann=FALSE) | 324 plot(0,type='n',axes=FALSE,ann=FALSE) |
305 | |
306 title(main=paste0("Qualitycontrol of filtering tool for file: \n\n", "$infile.display_name")) | 325 title(main=paste0("Qualitycontrol of filtering tool for file: \n\n", "$infile.display_name")) |
307 | |
308 | |
309 | |
310 grid.table(property_df, rows= NULL) | 326 grid.table(property_df, rows= NULL) |
311 | 327 |
312 ### heatmap image as visual pixel control | 328 ### heatmap image as visual pixel control |
313 | 329 if (length(features(msidata))> 0 & length(pixels(msidata)) > 0){ |
314 if (length(features(msidata))> 0 & length(pixels(msidata)) > 0) | 330 image(msidata, mz=$outputs.inputmz, plusminus = $outputs.plusminus_dalton, contrast.enhance = "none", |
315 { | |
316 | |
317 | |
318 image(msidata, mz=$outputs.inputmz, plusminus = $outputs.plusminus_dalton, contrast.enhance = "none", | |
319 main= paste0($outputs.inputmz," ± ", $outputs.plusminus_dalton, " Da"), ylim = c(maximumy2+0.2*maximumy2,minimumy2-0.2*minimumy2)) | 331 main= paste0($outputs.inputmz," ± ", $outputs.plusminus_dalton, " Da"), ylim = c(maximumy2+0.2*maximumy2,minimumy2-0.2*minimumy2)) |
320 | 332 |
321 ### control features which are left | 333 ### control features which are left |
322 | 334 plot(featuresinfile, rep(1,length(featuresinfile)), yaxt="n", ylab=NA, xlab="m/z values", col="red", ylim=c(0.8, 1.1), main="Distribution of m/z values") |
323 plot(featuresinfile, rep(1,length(featuresinfile)), yaxt="n", ylab=NA, xlab="m/z values", col="red", ylim=c(0.8, 1.1), main="Distribution of m/z values") | 335 lines(mz(msidata),rep(0.9, length(mz(msidata))), col="green", type="p") |
324 lines(mz(msidata),rep(0.9, length(mz(msidata))), col="green", type="p") | 336 legend("top", horiz=TRUE, legend = c("before", "filtered"), fill = c("red", "green")) |
325 legend("top", horiz=TRUE, | 337 }else{ |
326 legend = c("before", "filtered"), | 338 print("file has no features or pixels left")} |
327 fill = c("red", "green")) | |
328 | |
329 }else{ | |
330 print("file has no features or pixels left") | |
331 } | |
332 | 339 |
333 dev.off() | 340 dev.off() |
334 | 341 |
335 #end if | 342 #end if |
336 | 343 |
337 ######################################## intensity matrix ###################### | 344 ############################### optional intensity matrix ###################### |
338 | 345 |
339 #if $output_matrix: | 346 #if $output_matrix: |
340 | 347 |
341 if (length(features(msidata))> 0 & length(pixels(msidata)) > 0) | 348 if (length(features(msidata))> 0 & length(pixels(msidata)) > 0){ |
342 { | |
343 | |
344 spectramatrix = spectra(msidata) | 349 spectramatrix = spectra(msidata) |
345 rownames(spectramatrix) = mz(msidata) | 350 rownames(spectramatrix) = mz(msidata) |
346 newmatrix = rbind(pixels(msidata), spectramatrix) | 351 newmatrix = rbind(pixels(msidata), spectramatrix) |
347 write.table(newmatrix[2:nrow(newmatrix),], file="$matrixasoutput", quote = FALSE, row.names = TRUE, col.names=NA, sep = "\t") | 352 write.table(newmatrix[2:nrow(newmatrix),], file="$matrixasoutput", quote = FALSE, row.names = TRUE, col.names=NA, sep = "\t") |
348 | |
349 }else{ | 353 }else{ |
350 print("file has no features or pixels left") | 354 print("file has no features or pixels left")} |
351 } | |
352 | 355 |
353 #end if | 356 #end if |
354 | 357 |
355 | 358 |
356 ]]></configfile> | 359 ]]></configfile> |
357 </configfiles> | 360 </configfiles> |
358 <inputs> | 361 <inputs> |
359 <param name="infile" type="data" format="imzml, rdata, analyze75" | 362 <param name="infile" type="data" format="imzml,rdata,analyze75" |
360 label="Inputfile as imzML, Analyze7.5 or Cardinal MSImageSet saved as RData" | 363 label="Inputfile as imzML, Analyze7.5 or Cardinal MSImageSet saved as RData" |
361 help="Upload composite datatype imzML (ibd+imzML) or analyze75 (hdr+img+t2m) or regular upload .RData (Cardinal MSImageSet)"/> | 364 help="Upload composite datatype imzML (ibd+imzML) or analyze75 (hdr+img+t2m) or regular upload .RData (Cardinal MSImageSet)"/> |
362 <conditional name="pixels_cond"> | 365 <conditional name="pixels_cond"> |
363 <param name="pixel_filtering" type="select" label="Select pixel filtering option"> | 366 <param name="pixel_filtering" type="select" label="Select pixel filtering option"> |
364 <option value="none" selected="True">none</option> | 367 <option value="none" selected="True">none</option> |
386 </when> | 389 </when> |
387 </conditional> | 390 </conditional> |
388 <conditional name="features_cond"> | 391 <conditional name="features_cond"> |
389 <param name="features_filtering" type="select" label="Select feature filtering option"> | 392 <param name="features_filtering" type="select" label="Select feature filtering option"> |
390 <option value="none" selected="True">none</option> | 393 <option value="none" selected="True">none</option> |
391 <option value="features_list">tabular file with features (data type: 800.12 or m/z = 800.12)</option> | 394 <option value="features_list">keep features (tabular input)</option> |
392 <option value="features_range">range of features</option> | 395 <option value="features_range">keep features within a range (manual input)</option> |
396 <option value="remove_features">remove features (tabular input)</option> | |
393 </param> | 397 </param> |
394 <when value="none"/> | 398 <when value="none"/> |
395 <when value="features_list"> | 399 <when value="features_list"> |
396 <param name="inputfeatures" type="data" format="tabular" label="Features for filtering of MSI data" help="tabular file with masses of interest either as numbers (800.05) or in the form m/z = 800.05"/> | 400 <param name="inputfeatures" type="data" format="tabular" label="Features for filtering of MSI data" help="tabular file with m/z of interest either as numbers (800.05) or in the form m/z = 800.05"/> |
397 <param name="feature_column" data_ref="inputfeatures" label="Column with features" type="data_column"/> | 401 <param name="feature_column" data_ref="inputfeatures" label="Column with features" type="data_column"/> |
398 <param name="feature_header" label="Number of header lines to skip" value="0" type="integer"/> | 402 <param name="feature_header" label="Number of header lines to skip" value="0" type="integer"/> |
399 </when> | 403 </when> |
400 <when value="features_range"> | 404 <when value="features_range"> |
401 <param name="min_mz" type="float" value="1" label="Minimum value for mz (in Dalton)"/> | 405 <param name="min_mz" type="float" value="1" label="Minimum value for m/z"/> |
402 <param name="max_mz" type="float" value="100" label="Maximum value for mz (in Dalton)"/> | 406 <param name="max_mz" type="float" value="100" label="Maximum value for m/z"/> |
407 </when> | |
408 <when value="remove_features"> | |
409 <param name="inputfeatures_removal" type="data" format="tabular" label="Features for filtering of MSI data" help="tabular file with m/z to be removed either as numbers (800.05) or in the form m/z = 800.05"/> | |
410 <param name="removal_column" data_ref="inputfeatures_removal" label="Column with features" type="data_column"/> | |
411 <param name="removal_header" label="Number of header lines to skip" value="0" type="integer"/> | |
412 <param name="removal_plusminus" type="float" value="20" label="Window in which m/z will be removed" help="This value will be added and substracted from the given input value"/> | |
413 <param name="units_removal" type="select" display = "radio" optional = "False" label="units"> | |
414 <option value="ppm" selected="True">ppm</option> | |
415 <option value="Da">Da</option> | |
416 </param> | |
403 </when> | 417 </when> |
404 </conditional> | 418 </conditional> |
405 <conditional name="outputs"> | 419 <conditional name="outputs"> |
406 <param name="outputs_select" type="select" label="Quality control output"> | 420 <param name="outputs_select" type="select" label="Quality control output"> |
407 <option value="quality_control" selected="True">yes</option> | 421 <option value="quality_control" selected="True">yes</option> |
408 <option value="no_quality_control">no</option> | 422 <option value="no_quality_control">no</option> |
409 </param> | 423 </param> |
410 <when value="quality_control"> | 424 <when value="quality_control"> |
411 <param name="inputmz" type="float" value="1296.7" label="Mass for which a heatmap image will be drawn" help="Use a mass which is still present in all pixels to control if the pixel filtering went well"/> | 425 <param name="inputmz" type="float" value="1296.7" label="M/z for which a heatmap image will be drawn" help="Use a m/z which is still present in all pixels to control if the pixel filtering went well"/> |
412 <param name="plusminus_dalton" value="0.25" type="float" label="mass range for mz value" help="plusminus mass window in Dalton"/> | 426 <param name="plusminus_dalton" value="0.25" type="float" label="Range for m/z value" help="plusminus m/z window"/> |
413 </when> | 427 </when> |
414 <when value="no_quality_control"/> | 428 <when value="no_quality_control"/> |
415 </conditional> | 429 </conditional> |
416 <param name="output_matrix" type="boolean" display="radio" label="Intensity matrix output"/> | 430 <param name="output_matrix" type="boolean" display="radio" label="Intensity matrix output"/> |
417 </inputs> | 431 </inputs> |
418 <outputs> | 432 <outputs> |
419 <data format="rdata" name="msidata_filtered" label="${tool.name} ${on_string}"/> | 433 <data format="rdata" name="msidata_filtered" label="$infile.display_name filtered"/> |
420 <data format="pdf" name="filtering_qc" from_work_dir="filtertool_QC.pdf" label = "QC ${tool.name} ${on_string}"> | 434 <data format="pdf" name="filtering_qc" from_work_dir="filtertool_QC.pdf" label = "$infile.display_name filtered_QC"> |
421 <filter>outputs["outputs_select"] == "quality_control"</filter> | 435 <filter>outputs["outputs_select"] == "quality_control"</filter> |
422 </data> | 436 </data> |
423 <data format="tabular" name="matrixasoutput" label="Matrix ${tool.name} ${on_string}"> | 437 <data format="tabular" name="matrixasoutput" label="$infile.display_name filtered_matrix"> |
424 <filter>output_matrix</filter> | 438 <filter>output_matrix</filter> |
425 </data> | 439 </data> |
426 </outputs> | 440 </outputs> |
427 <tests> | 441 <tests> |
428 <test expect_num_outputs="2"> | 442 <test expect_num_outputs="2"> |
468 <param name="min_x_range" value="1"/> | 482 <param name="min_x_range" value="1"/> |
469 <param name="max_x_range" value="20"/> | 483 <param name="max_x_range" value="20"/> |
470 <param name="min_y_range" value="2"/> | 484 <param name="min_y_range" value="2"/> |
471 <param name="max_y_range" value="2"/> | 485 <param name="max_y_range" value="2"/> |
472 <param name="features_filtering" value="features_range"/> | 486 <param name="features_filtering" value="features_range"/> |
473 <param name="min_mz" value="0" /> | 487 <param name="min_mz" value="350" /> |
474 <param name="max_mz" value="500"/> | 488 <param name="max_mz" value="500"/> |
475 <param name="outputs_select" value="quality_control"/> | 489 <param name="outputs_select" value="quality_control"/> |
476 <param name="inputmz" value="328.9"/> | 490 <param name="inputmz" value="328.9"/> |
477 <param name="plusminus_dalton" value="0.25"/> | 491 <param name="plusminus_dalton" value="0.25"/> |
478 <param name="output_matrix" value="True"/> | 492 <param name="output_matrix" value="True"/> |
507 <param name="pixel_filtering" value="pixel_range"/> | 521 <param name="pixel_filtering" value="pixel_range"/> |
508 <param name="min_x_range" value="0"/> | 522 <param name="min_x_range" value="0"/> |
509 <param name="max_x_range" value="10"/> | 523 <param name="max_x_range" value="10"/> |
510 <param name="min_y_range" value="2"/> | 524 <param name="min_y_range" value="2"/> |
511 <param name="max_y_range" value="20"/> | 525 <param name="max_y_range" value="20"/> |
512 <param name="features_filtering" value="features_range"/> | 526 <param name="features_filtering" value="features_list"/> |
513 <param name="min_mz" value="500" /> | 527 <param name="inputfeatures" ftype="tabular" value = "featuresofinterest5.tabular"/> |
514 <param name="max_mz" value="700"/> | 528 <param name="feature_column" value="1"/> |
529 <param name="feature_header" value="0"/> | |
515 <param name="outputs_select" value="quality_control"/> | 530 <param name="outputs_select" value="quality_control"/> |
516 <param name="inputmz" value="328.9"/> | 531 <param name="inputmz" value="328.9"/> |
517 <param name="plusminus_dalton" value="0.25"/> | 532 <param name="plusminus_dalton" value="0.25"/> |
518 <output name="filtering_qc" file="imzml_filtered5.pdf" compare="sim_size" delta="20000"/> | 533 <output name="filtering_qc" file="imzml_filtered5.pdf" compare="sim_size" delta="20000"/> |
519 <output name="msidata_filtered" file="imzml_filtered5.RData" compare="sim_size" /> | 534 <output name="msidata_filtered" file="imzml_filtered5.RData" compare="sim_size" /> |
567 <help> | 582 <help> |
568 <![CDATA[ | 583 <![CDATA[ |
569 | 584 |
570 Cardinal is an R package that implements statistical & computational tools for analyzing mass spectrometry imaging datasets. `More information on Cardinal <http://cardinalmsi.org//>`_ | 585 Cardinal is an R package that implements statistical & computational tools for analyzing mass spectrometry imaging datasets. `More information on Cardinal <http://cardinalmsi.org//>`_ |
571 | 586 |
572 This tool provides provides options to filter (subset) pixels and masses of mass-spectrometry imaging data. | 587 This tool provides provides options to filter (subset) pixels and m/z features of mass spectrometry imaging data. |
573 | 588 |
574 Input data: 3 types of input data can be used: | 589 Input data: 3 types of input data can be used: |
575 | 590 |
576 - imzml file (upload imzml and ibd file via the "composite" function) `Introduction to the imzml format <https://ms-imaging.org/wp/imzml/>`_ | 591 - imzml file (upload imzml and ibd file via the "composite" function) `Introduction to the imzml format <https://ms-imaging.org/wp/imzml/>`_ |
577 - Analyze7.5 (upload hdr, img and t2m file via the "composite" function) | 592 - Analyze7.5 (upload hdr, img and t2m file via the "composite" function) |
578 - Cardinal "MSImageSet" data (with variable name "msidata", saved as .RData) | 593 - Cardinal "MSImageSet" data (with variable name "msidata", saved as .RData) |
579 | 594 |
595 | |
580 Options: | 596 Options: |
581 | 597 |
582 - pixel filtering: can use a tabular file containing x and y coordinates or by defining a range for x and y by hand | 598 - pixel filtering: can use a tabular file containing x and y coordinates or by defining a range for x and y by hand |
583 - mass filtering: can use a tabular file containing masses of interest or by defining a range for the mass values | 599 - m/z feature filtering: can use a tabular file containing m/z of interest or by defining a range for the m/z values (! numeric input will be rounded to 2 digits before matching to m/z!) |
600 - m/z feature removing: infering m/z such as matrix contaminants can be removed by specifying their m/z in a tabular file and optionally set a window (window in ppm or Da in which peaks should be removed) | |
601 | |
584 | 602 |
585 Output: | 603 Output: |
586 | 604 |
587 - imzML file filtered for pixels and/or masses | 605 - imzML file filtered for pixels and/or m/z |
588 - optional: pdf with heatmap showing the pixels that are left after filtering and plot of masses before and after filtering | 606 - optional: pdf with heatmap showing the pixels that are left after filtering and plot of m/z before and after filtering |
589 - optional: intensity matrix as tabular file (intensities for masses in rows and pixel in columns) | 607 - optional: intensity matrix as tabular file (intensities for m/z in rows and pixel in columns) |
608 | |
590 | 609 |
591 Tip: | 610 Tip: |
592 | 611 |
593 - It is recommended to use the filtering tool only for masses which have been extracted from the same dataset. If you have masses from dataset A and you want to use them to filter dataset B, first find the corresponding (closest) features in dataset B by using the tool "Join two files on column allowing a small difference". Afterwards use the corresponding feature masses from dataset A to filter dataset B. | 612 - It is recommended to use the filtering tool only for m/z which have been extracted from the same dataset. If you have m/z from dataset A and you want to use them to filter dataset B, first find the corresponding (closest) features in dataset B by using the tool "Join two files on column allowing a small difference". Afterwards use the corresponding feature m/z from dataset A to filter dataset B. |
594 | 613 |
595 | 614 |
596 ]]> | 615 ]]> |
597 </help> | 616 </help> |
598 <citations> | 617 <citations> |