Mercurial > repos > galaxyp > cardinal_filtering
comparison filtering.xml @ 2:0c4579390f73 draft
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cardinal commit f127be2141cf22e269c85282d226eb16fe14a9c1
author | galaxyp |
---|---|
date | Fri, 15 Feb 2019 10:15:15 -0500 |
parents | aac805a9d2ae |
children | 58376f5a6319 |
comparison
equal
deleted
inserted
replaced
1:aac805a9d2ae | 2:0c4579390f73 |
---|---|
1 <tool id="cardinal_filtering" name="MSI filtering" version="@VERSION@.1"> | 1 <tool id="cardinal_filtering" name="MSI filtering" version="@VERSION@.2"> |
2 <description>tool for filtering mass spectrometry imaging data</description> | 2 <description>tool for filtering mass spectrometry imaging data</description> |
3 <macros> | 3 <macros> |
4 <import>macros.xml</import> | 4 <import>macros.xml</import> |
5 </macros> | 5 </macros> |
6 <expand macro="requirements"> | 6 <expand macro="requirements"> |
13 | 13 |
14 @INPUT_LINKING@ | 14 @INPUT_LINKING@ |
15 cat '${MSI_subsetting}' && | 15 cat '${MSI_subsetting}' && |
16 Rscript '${MSI_subsetting}' && | 16 Rscript '${MSI_subsetting}' && |
17 | 17 |
18 #if $imzml_output: | 18 #if str($imzml_output) == "imzml_format": |
19 mkdir $outfile_imzml.files_path && | 19 mkdir $outfile_imzml.files_path && |
20 ls -l && | |
21 mv ./out.imzML "${os.path.join($outfile_imzml.files_path, 'imzml')}" | true && | 20 mv ./out.imzML "${os.path.join($outfile_imzml.files_path, 'imzml')}" | true && |
22 mv ./out.ibd "${os.path.join($outfile_imzml.files_path, 'ibd')}" | true && | 21 mv ./out.ibd "${os.path.join($outfile_imzml.files_path, 'ibd')}" | true && |
23 #end if | 22 #end if |
24 echo "imzML file:" > $outfile_imzml && | 23 echo "imzML file:" > $outfile_imzml && |
25 ls -l "$outfile_imzml.files_path" >> $outfile_imzml | 24 ls -l "$outfile_imzml.files_path" >> $outfile_imzml |
26 | |
27 | |
28 ]]> | 25 ]]> |
29 </command> | 26 </command> |
30 | 27 |
31 | 28 |
32 <configfiles> | 29 <configfiles> |
56 minimumx = min(coord(msidata)[,1]) | 53 minimumx = min(coord(msidata)[,1]) |
57 maximumx = max(coord(msidata)[,1]) | 54 maximumx = max(coord(msidata)[,1]) |
58 ## Range y coordinates | 55 ## Range y coordinates |
59 minimumy = min(coord(msidata)[,2]) | 56 minimumy = min(coord(msidata)[,2]) |
60 maximumy = max(coord(msidata)[,2]) | 57 maximumy = max(coord(msidata)[,2]) |
61 ## Number of intensities > 0 | |
62 npeaks= sum(spectra(msidata)[]>0, na.rm=TRUE) | |
63 ## Spectra multiplied with m/z (potential number of peaks) | |
64 numpeaks = ncol(spectra(msidata)[])*nrow(spectra(msidata)[]) | |
65 ## Percentage of intensities > 0 | |
66 percpeaks = round(npeaks/numpeaks*100, digits=2) | |
67 ## Number of empty TICs | |
68 TICs = colSums(spectra(msidata)[], na.rm=TRUE) | |
69 NumemptyTIC = sum(TICs == 0) | |
70 ## median TIC | |
71 medint = round(median(TICs), digits=2) | |
72 ## Store features for QC plot | 58 ## Store features for QC plot |
73 featuresinfile = mz(msidata) | 59 featuresinfile = mz(msidata) |
74 | 60 |
75 ## Next steps will only run if there are more than 0 intensities/pixels/features in the file | 61 ## Next steps will only run if there are more than 0 pixels/features in the file |
76 | 62 |
77 if (sum(spectra(msidata)[]>0, na.rm=TRUE) > 0) | 63 if (ncol(msidata)>0 & nrow(msidata) >0) |
78 { | 64 { |
79 | |
80 | |
81 ## prepare dataframe for QC of pixel distribution (will be overwritten in filtering of pixels condition) | |
82 position_df = cbind(coord(msidata)[,1:2], rep("$infile.element_identifier", times=ncol(msidata))) | |
83 colnames(position_df)[3] = "annotation" | |
84 | |
85 ###################################### Filtering of pixels ##################### | 65 ###################################### Filtering of pixels ##################### |
86 ################################################################################ | 66 ################################################################################ |
87 | 67 |
88 ############ Pixels in two columns format: x and y in different columns ############# | 68 ############ Pixels in two columns format: x and y in different columns ############# |
89 | 69 |
91 print("two columns") | 71 print("two columns") |
92 | 72 |
93 ## read tabular file | 73 ## read tabular file |
94 input_list = read.delim("$pixels_cond.annotation_file", header = $pixels_cond.tabular_header, | 74 input_list = read.delim("$pixels_cond.annotation_file", header = $pixels_cond.tabular_header, |
95 stringsAsFactors = FALSE) | 75 stringsAsFactors = FALSE) |
76 inputpixels = input_list[,c($pixels_cond.column_x, $pixels_cond.column_y)] | |
77 input_pixels = paste(inputpixels[,1], inputpixels[,2], sep="_") | |
78 dataset_pixels = paste(coord(msidata)\$x, coord(msidata)\$y, sep="_") | |
79 pixelsofinterest = dataset_pixels %in% input_pixels | |
80 | |
81 tryCatch( | |
82 { | |
83 msidata = msidata[,pixelsofinterest] | |
84 if (ncol(msidata) == 0) | |
85 { | |
86 stop(call.=FALSE) | |
87 } | |
88 }, | |
89 error=function(cond) { | |
90 ## in case all coordinates were outside the dataset leading to zero pixels, tool is stopped to avoid continuing with wrong data | |
91 message("Error during pixel filtering") | |
92 message("Possible problems: Forgot to set 'Tabular file contains a header line' = Yes, wrong columns selected, columns with coordinates contain empty fields or letters, all coordinates were outside the range of the dataset - this can be checked with the 'MSI qualitycontrol' tool)") | |
93 stop(call.=FALSE) | |
94 } | |
95 ) | |
96 | |
97 ## QC values: | |
96 numberpixels = nrow(input_list) | 98 numberpixels = nrow(input_list) |
97 inputpixels = input_list[,c($pixels_cond.column_x, $pixels_cond.column_y, $pixels_cond.column_names)] | |
98 | |
99 ## rewrite into x = 1, y = 1 format and filter msidata, count validpixels | |
100 pixelvector = character() | |
101 for (pixel in 1:nrow(inputpixels)){ | |
102 pixelvector[pixel] = paste0("x = ", inputpixels[pixel,1],", ", "y = ", inputpixels[pixel,2])} | |
103 pixelsofinterest= pixels(msidata)[names(pixels(msidata)) %in% pixelvector] | |
104 msidata = msidata[,pixelsofinterest] | |
105 validpixels=ncol(msidata) | 99 validpixels=ncol(msidata) |
106 | 100 |
107 ## in case some pixels are left print annotation plot | 101 ########### Pixels wihin x and y minima and maxima are kept ################# |
108 colnames(inputpixels) = c("x", "y", "annotation") | |
109 position_df = merge(coord(msidata)[,1:2], inputpixels, by=c("x", "y"), all.x=TRUE) | |
110 colnames(position_df)[3] = "annotation" | |
111 position_df\$annotation = factor(position_df\$annotation) | |
112 | |
113 | |
114 ########### Pixels wihin x and y minima and maxima are kept ################### | |
115 | 102 |
116 #elif str($pixels_cond.pixel_filtering) == "pixel_range": | 103 #elif str($pixels_cond.pixel_filtering) == "pixel_range": |
117 print("pixel range") | 104 print("pixel range") |
118 | 105 |
106 ## QC values: | |
119 numberpixels = "range" | 107 numberpixels = "range" |
120 validpixels = "range" | 108 validpixels = "range" |
121 | 109 |
122 ## only filter pixels if at least one pixel will be left | 110 tryCatch( |
123 if (sum(coord(msidata)\$x <= $pixels_cond.max_x_range & coord(msidata)\$x >= $pixels_cond.min_x_range) > 0 & sum(coord(msidata)\$y <= $pixels_cond.max_y_range & coord(msidata)\$y >= $pixels_cond.min_y_range) > 0){ | 111 { |
124 | 112 msidata = msidata[, coord(msidata)\$x <= $pixels_cond.max_x_range & coord(msidata)\$x >= $pixels_cond.min_x_range] |
125 msidata = msidata[, coord(msidata)\$x <= $pixels_cond.max_x_range & coord(msidata)\$x >= $pixels_cond.min_x_range] | 113 msidata = msidata[, coord(msidata)\$y <= $pixels_cond.max_y_range & coord(msidata)\$y >= $pixels_cond.min_y_range] |
126 msidata = msidata[, coord(msidata)\$y <= $pixels_cond.max_y_range & coord(msidata)\$y >= $pixels_cond.min_y_range] | 114 if (ncol(msidata) == 0) |
127 }else{ | 115 { |
128 | 116 stop(call.=FALSE) |
129 print("no valid pixel found") | 117 } |
130 msidata = msidata[,0]} | 118 }, |
131 | 119 error=function(cond) { |
132 ## update position_df for filtered pixels | 120 ## in case one of the ranges was outside the dataset leading to zero pixels, tool is stopped to avoid continuing with wrong data |
133 position_df = cbind(coord(msidata)[,1:2], rep("$infile.element_identifier", times=ncol(msidata))) | 121 message("Error during pixel filtering") |
134 colnames(position_df)[3] = "annotation" | 122 message("Check that both x and y ranges were inside the dataset coordinates (can be checked with the 'MSI qualitycontrol' tool) or if any not numeric character was entered into the input fields") |
135 position_df\$annotation = factor(position_df\$annotation) | 123 stop(call.=FALSE) |
124 } | |
125 ) | |
126 | |
127 ######################## no pixel filtering ################################ | |
136 | 128 |
137 #elif str($pixels_cond.pixel_filtering) == "none": | 129 #elif str($pixels_cond.pixel_filtering) == "none": |
138 print("no pixel filtering") | 130 print("no pixel filtering") |
139 | 131 |
132 ## QC values: | |
140 numberpixels = 0 | 133 numberpixels = 0 |
141 validpixels = 0 | 134 validpixels = 0 |
142 | 135 |
143 #end if | 136 #end if |
137 | |
138 ############################# QC data ##################################### | |
139 | |
140 ## dataframe for QC of pixel distribution | |
141 position_df = cbind(coord(msidata)[,1:2], rep("remaining pixels", times=ncol(msidata))) | |
142 colnames(position_df)[3] = "annotation" | |
143 position_df\$annotation = factor(position_df\$annotation) | |
144 gc() | |
144 | 145 |
145 }else{ | 146 }else{ |
146 print("Inputfile has no intensities > 0") | 147 print("Inputfile has no intensities > 0") |
147 } | 148 } |
148 | 149 |
149 ################################# filtering of features ###################### | 150 ################################# filtering of features ###################### |
150 ############################################################################## | 151 ############################################################################## |
151 | 152 |
152 ####################### Keep m/z from tabular file ######################### | 153 ####################### Keep m/z from tabular file ######################### |
153 | 154 |
154 ## feature filtering only when pixels/features/intensities are left | 155 ## feature filtering only when pixels/features/intensities are left |
155 | 156 |
157 | |
156 if (ncol(msidata) > 0){ | 158 if (ncol(msidata) > 0){ |
157 npeaks_before_filtering= sum(spectra(msidata)[]>0, na.rm=TRUE) | 159 if (nrow(msidata) > 0) |
158 if (npeaks_before_filtering > 0) | |
159 { | 160 { |
160 | 161 |
161 #if str($features_cond.features_filtering) == "features_list": | 162 #if str($features_cond.features_filtering) == "features_list": |
162 print("feature list") | 163 print("feature list") |
163 | 164 |
164 ## read tabular file, define starting row, extract and count valid features | 165 ## read tabular file, define starting row, extract and count valid features |
165 input_features = read.delim("$mz_tabular", header = $features_cond.feature_header, stringsAsFactors = FALSE) | 166 input_features = read.delim("$mz_tabular", header = $features_cond.feature_header, stringsAsFactors = FALSE) |
166 extracted_features = input_features[,$features_cond.feature_column] | 167 extracted_features = input_features[,$features_cond.feature_column] |
167 numberfeatures = length(extracted_features) | 168 numberfeatures = length(extracted_features) |
169 | |
168 if (class(extracted_features) == "numeric"){ | 170 if (class(extracted_features) == "numeric"){ |
169 ### max digits given in the input file will be used to match m/z but the maximum is 4 | 171 ### max digits given in the input file will be used to match m/z but the maximum is 4 |
170 max_digits = max(nchar(sapply(strsplit(as.character(extracted_features), "\\."),`[`,2)), na.rm=TRUE) | 172 max_digits = max(nchar(sapply(strsplit(as.character(extracted_features), "\\."),`[`,2)), na.rm=TRUE) |
171 | 173 |
172 if (max_digits >4) | 174 if (max_digits >4) |
175 } | 177 } |
176 | 178 |
177 validfeatures = round(extracted_features, max_digits) %in% round(mz(msidata),max_digits) | 179 validfeatures = round(extracted_features, max_digits) %in% round(mz(msidata),max_digits) |
178 featuresofinterest = features(msidata)[round(mz(msidata), digits = max_digits) %in% round(extracted_features[validfeatures], max_digits)] | 180 featuresofinterest = features(msidata)[round(mz(msidata), digits = max_digits) %in% round(extracted_features[validfeatures], max_digits)] |
179 validmz = length(unique(featuresofinterest)) | 181 validmz = length(unique(featuresofinterest)) |
182 | |
180 }else{ | 183 }else{ |
181 validmz = 0 | 184 validmz = 0 |
182 featuresofinterest = 0} | 185 featuresofinterest = 0} |
183 | 186 |
184 ### filter msidata for valid features | 187 ### filter msidata for valid features |
185 msidata = msidata[featuresofinterest,] | 188 |
189 tryCatch( | |
190 { | |
191 msidata = msidata[featuresofinterest,] | |
192 ## does not throw error when processed file has no features left, therefore create error to avoid continuing with wrong data | |
193 if (nrow(msidata) == 0) | |
194 { | |
195 stop(call.=FALSE) | |
196 } | |
197 }, | |
198 error=function(cond) { | |
199 ## in case all provided m/z values were outside the m/z range | |
200 ## tool is stopped to avoid continuing with wrong data | |
201 message("Error during m/z filtering") | |
202 message("Possible problems: Forgot to set 'Tabular file contains a header line' = Yes, wrong columns selected, column with m/z features contains empty fields or letters, all m/z features s were outside the range of the dataset - this can be checked with the 'MSI qualitycontrol' tool) or did not match any m/z feature of the dataset (see help section for more information on that)") | |
203 stop(call.=FALSE) | |
204 } | |
205 ) | |
206 | |
186 | 207 |
187 ############### features within a given range are kept ##################### | 208 ############### features within a given range are kept ##################### |
188 | 209 |
189 #elif str($features_cond.features_filtering) == "features_range": | 210 #elif str($features_cond.features_filtering) == "features_range": |
190 print("feature range") | 211 print("feature range") |
191 | 212 |
192 numberfeatures = "range" | 213 numberfeatures = "range" |
193 validmz = "range" | 214 validmz = "range" |
194 | 215 |
195 if (sum(mz(msidata) >= $features_cond.min_mz & mz(msidata) <= $features_cond.max_mz)> 0){ | 216 tryCatch( |
196 msidata = msidata[mz(msidata) >= $features_cond.min_mz & mz(msidata) <= $features_cond.max_mz,] | 217 { |
197 }else{ | 218 msidata = msidata[mz(msidata) >= $features_cond.min_mz & mz(msidata) <= $features_cond.max_mz,] |
198 msidata = msidata[0,] | 219 ## does not throw error when processed file has no features left, therefore create error to avoid continuing with wrong data |
199 print("no valid mz range")} | 220 if (nrow(msidata) == 0) |
221 { | |
222 stop(call.=FALSE) | |
223 } | |
224 }, | |
225 error=function(cond) { | |
226 ## in case all m/z features were outside the dataset leading to zero m/z features, tool is stopped to avoid continuing with wrong data | |
227 message("Error during m/z filtering") | |
228 message("Check that the entered m/z range is inside the dataset coordinates (can be checked with the 'MSI qualitycontrol' tool) or if any not numeric character was entered into the input fields") | |
229 stop(call.=FALSE) | |
230 } | |
231 ) | |
200 | 232 |
201 ############### Remove m/z from tabular file ######################### | 233 ############### Remove m/z from tabular file ######################### |
202 | 234 |
203 #elif str($features_cond.features_filtering) == "remove_features": | 235 #elif str($features_cond.features_filtering) == "remove_features": |
204 print("remove features") | 236 print("remove features") |
205 | 237 |
206 ## read tabular file, define starting row, extract and count valid features | 238 ## read tabular file, define starting row, extract and count valid features |
207 input_features = read.delim("$mz_tabular", header = $features_cond.removal_header, stringsAsFactors = FALSE) | 239 input_features = read.delim("$mz_tabular", header = $features_cond.feature_header, stringsAsFactors = FALSE) |
208 extracted_features = input_features[,$features_cond.removal_column] | 240 extracted_features = input_features[,$features_cond.feature_column] |
209 numberfeatures = length(extracted_features) | 241 numberfeatures = length(extracted_features) |
210 if (class(extracted_features) == "numeric"){ | 242 if (class(extracted_features) == "numeric"){ |
211 print("input is numeric") | 243 print("input is numeric") |
212 featuresofinterest = extracted_features | 244 featuresofinterest = extracted_features |
213 validmz = sum(featuresofinterest <= max(mz(msidata))& featuresofinterest >= min(mz(msidata))) | 245 }else{featuresofinterest = 0} |
214 }else{featuresofinterest = 0 | |
215 validmz = 0} | |
216 | 246 |
217 ### Here starts removal of features: | 247 ### Here starts removal of features: |
218 plusminus = $features_cond.removal_plusminus | 248 plusminus = $features_cond.removal_plusminus |
219 | 249 |
220 mass_to_remove = numeric() | 250 tryCatch( |
221 if (sum(featuresofinterest) > 0){ | 251 { |
222 for (masses in featuresofinterest){ | 252 mass_to_remove = numeric() |
223 #if str($features_cond.units_removal) == "ppm": | 253 for (masses in featuresofinterest){ |
224 plusminus = masses * $features_cond.removal_plusminus/1000000 | 254 #if str($features_cond.units_removal) == "ppm": |
225 #end if | 255 plusminus = masses * $features_cond.removal_plusminus/1000000 |
226 current_mass = which(c(mz(msidata) <= masses + plusminus & mz(msidata) >= masses - plusminus)) | 256 #end if |
227 mass_to_remove = append(mass_to_remove, current_mass)} | 257 current_mass = which(c(mz(msidata) <= masses + plusminus & mz(msidata) >= masses - plusminus)) |
228 msidata= msidata[-mass_to_remove, ] | 258 mass_to_remove = append(mass_to_remove, current_mass)} |
229 }else{print("No features were removed as they were not fitting to m/z values and/or range")} | 259 msidata= msidata[-mass_to_remove, ] |
230 | 260 validmz = numberfeatures - nrow(msidata) |
261 ## does not throw error when processed file has no features left, therefore create error to avoid continuing with wrong data | |
262 if (nrow(msidata) == 0) | |
263 { | |
264 stop(call.=FALSE) | |
265 } | |
266 }, | |
267 error=function(cond) { | |
268 message("Error during removal of m/z features") | |
269 stop(call.=FALSE) | |
270 } | |
271 ) | |
272 | |
273 | |
274 | |
275 | |
276 ######################## No m/z filtering ############################## | |
231 | 277 |
232 #elif str($features_cond.features_filtering) == "none": | 278 #elif str($features_cond.features_filtering) == "none": |
233 | 279 |
234 print("no feature filtering") | 280 print("no feature filtering") |
235 validmz = 0 | 281 validmz = 0 |
236 numberfeatures = 0 | 282 numberfeatures = 0 |
237 | 283 |
238 #end if | 284 #end if |
239 | 285 |
240 ## save msidata as Rfile | |
241 save(msidata, file="$msidata_filtered") | |
242 | |
243 ## Number of empty TICs | |
244 TICs2 = colSums(spectra(msidata)[], na.rm=TRUE) | |
245 ## Number of intensities > 0 | |
246 npeaks2= sum(spectra(msidata)[]>0, na.rm=TRUE) | |
247 ## Spectra multiplied with m/z (potential number of peaks) | |
248 numpeaks2 = ncol(spectra(msidata)[])*nrow(spectra(msidata)[]) | |
249 | |
250 | |
251 | |
252 }else{ | 286 }else{ |
253 print("Inputfile or file filtered for pixels has no intensities > 0") | 287 print("Inputfile has no m/z features") |
254 numberfeatures = NA | 288 numberfeatures = NA |
255 validmz = NA | 289 validmz = NA |
256 ## Number of empty TICs | |
257 TICs2 = 0 | |
258 npeaks2 = 0 | |
259 numpeaks2 = 0 | |
260 } | 290 } |
261 }else{ | 291 }else{ |
262 print("Inputfile or file filtered for pixels has no pixels left") | 292 print("Inputfile or file filtered for pixels has no pixels") |
263 numberfeatures = NA | 293 numberfeatures = NA |
264 validmz = NA | 294 validmz = NA |
265 ## Number of empty TICs | |
266 TICs2 = 0 | |
267 npeaks2 = 0 | |
268 numpeaks2 = 0 | |
269 } | 295 } |
270 #################### QC numbers ####################### | 296 gc() |
271 | 297 |
272 | 298 #################### QC numbers ####################### |
273 ## Number of features (m/z) | 299 |
274 maxfeatures2 = length(features(msidata)) | 300 ## Number of features (m/z) |
275 ## Range m/z | 301 maxfeatures2 = length(features(msidata)) |
276 minmz2 = round(min(mz(msidata)), digits=2) | 302 ## Range m/z |
277 maxmz2 = round(max(mz(msidata)), digits=2) | 303 minmz2 = round(min(mz(msidata)), digits=2) |
278 ## Number of spectra (pixels) | 304 maxmz2 = round(max(mz(msidata)), digits=2) |
279 pixelcount2 = length(pixels(msidata)) | 305 ## Number of spectra (pixels) |
280 ## Range x coordinates | 306 pixelcount2 = length(pixels(msidata)) |
281 minimumx2 = min(coord(msidata)[,1]) | 307 ## Range x coordinates |
282 maximumx2 = max(coord(msidata)[,1]) | 308 minimumx2 = min(coord(msidata)[,1]) |
283 ## Range y coordinates | 309 maximumx2 = max(coord(msidata)[,1]) |
284 minimumy2 = min(coord(msidata)[,2]) | 310 ## Range y coordinates |
285 maximumy2 = max(coord(msidata)[,2]) | 311 minimumy2 = min(coord(msidata)[,2]) |
286 | 312 maximumy2 = max(coord(msidata)[,2]) |
287 ## Percentage of intensities > 0 | 313 |
288 percpeaks2 = round(npeaks2/numpeaks2*100, digits=2) | 314 properties = c("Number of m/z features", |
289 ## Number of empty TICs | 315 "Range of m/z values", |
290 NumemptyTIC2 = sum(TICs2 == 0) | 316 "Number of pixels", |
291 ## median TIC | 317 "Range of x coordinates", |
292 medint2 = round(median(TICs2), digits=2) | 318 "Range of y coordinates", |
293 | 319 "pixel overview", |
294 properties = c("Number of m/z features", | 320 "feature overview") |
295 "Range of m/z values", | 321 |
296 "Number of pixels", | 322 before = c(paste0(maxfeatures), |
297 "Range of x coordinates", | 323 paste0(minmz, " - ", maxmz), |
298 "Range of y coordinates", | 324 paste0(pixelcount), |
299 "Intensities > 0", | 325 paste0(minimumx, " - ", maximumx), |
300 "Median TIC per pixel", | 326 paste0(minimumy, " - ", maximumy), |
301 "Number of empty spectra", | 327 paste0("input pixels: ", numberpixels), |
302 "pixel overview", | 328 paste0("input mz: ", numberfeatures)) |
303 "feature overview") | 329 |
304 | 330 filtered = c(paste0(maxfeatures2), |
305 before = c(paste0(maxfeatures), | 331 paste0(minmz2, " - ", maxmz2), |
306 paste0(minmz, " - ", maxmz), | 332 paste0(pixelcount2), |
307 paste0(pixelcount), | 333 paste0(minimumx2, " - ", maximumx2), |
308 paste0(minimumx, " - ", maximumx), | 334 paste0(minimumy2, " - ", maximumy2), |
309 paste0(minimumy, " - ", maximumy), | 335 paste0("valid pixels: ", validpixels), |
310 paste0(percpeaks, " %"), | 336 paste0("valid mz: ", validmz)) |
311 paste0(medint), | 337 |
312 paste0(NumemptyTIC), | 338 property_df = data.frame(properties, before, filtered) |
313 paste0("input pixels: ", numberpixels), | 339 |
314 paste0("input mz: ", numberfeatures)) | 340 ########################### PDF QC and MSI output ########################### |
315 | 341 |
316 filtered = c(paste0(maxfeatures2), | 342 pdf("filtertool_QC.pdf", fonts = "Times", pointsize = 12) |
317 paste0(minmz2, " - ", maxmz2), | 343 plot(0,type='n',axes=FALSE,ann=FALSE) |
318 paste0(pixelcount2), | 344 title(main=paste0("Qualitycontrol of filtering tool for file: \n\n", "$infile.display_name")) |
319 paste0(minimumx2, " - ", maximumx2), | 345 grid.table(property_df, rows= NULL) |
320 paste0(minimumy2, " - ", maximumy2), | 346 |
321 paste0(percpeaks2, " %"), | 347 ## QC report only when pixels/features are left |
322 paste0(medint2), | 348 if (ncol(msidata)>0 & nrow(msidata) >0) |
323 paste0(NumemptyTIC2), | |
324 paste0("valid pixels: ", validpixels), | |
325 paste0("valid mz: ", validmz)) | |
326 | |
327 property_df = data.frame(properties, before, filtered) | |
328 print(property_df) | |
329 | |
330 ########################### PDF QC and imzml output ########################### | |
331 | |
332 pdf("filtertool_QC.pdf", fonts = "Times", pointsize = 12) | |
333 plot(0,type='n',axes=FALSE,ann=FALSE) | |
334 title(main=paste0("Qualitycontrol of filtering tool for file: \n\n", "$infile.display_name")) | |
335 grid.table(property_df, rows= NULL) | |
336 | |
337 ## QC report with more than value-table: only when pixels/features/intensities are left | |
338 if (npeaks2 > 0) | |
339 { | 349 { |
340 | 350 |
351 ### visual pixel control | |
352 | |
353 pixel_image = ggplot(position_df, aes(x=x, y=y, fill=annotation))+ | |
354 geom_tile(height = 1, width=1)+ | |
355 coord_fixed()+ | |
356 ggtitle("Spatial orientation of filtered pixels")+ | |
357 theme_bw()+ | |
358 theme(plot.title = element_text(hjust = 0.5))+ | |
359 theme(legend.position="bottom",legend.direction="vertical") | |
360 print(pixel_image) | |
361 | |
362 ### plot features which are removed | |
363 hist(mz(msidata), xlab="m/z", main="Kept m/z values") | |
364 #if str($features_cond.features_filtering) == "none": | |
365 print("no difference histogram as no m/z filtering took place") | |
366 #else: | |
367 if (isTRUE(all.equal(featuresinfile, mz(msidata)))){ | |
368 print("No difference in m/z values before and after filtering, no histogram drawn") | |
369 }else{ | |
370 hist(setdiff(featuresinfile, mz(msidata)), xlab="m/z", main="Removed m/z values")} | |
371 #end if | |
372 | |
373 dev.off() | |
374 | |
341 ## save msidata as imzML file, will only work if there is at least 1 m/z left | 375 ## save msidata as imzML file, will only work if there is at least 1 m/z left |
342 #if $imzml_output: | 376 |
377 #if str($imzml_output) == "imzml_format": | |
343 if (maxfeatures2 > 0){ | 378 if (maxfeatures2 > 0){ |
379 ## make sure that coordinates are integers | |
380 coord(msidata)\$y = as.integer(coord(msidata)\$y) | |
381 coord(msidata)\$x = as.integer(coord(msidata)\$x) | |
344 writeImzML(msidata, "out")} | 382 writeImzML(msidata, "out")} |
383 #elif str($imzml_output) == "rdata_format": | |
384 ## save msidata as Rfile | |
385 iData(msidata) = iData(msidata)[] | |
386 save(msidata, file="$outfile_rdata") | |
345 #end if | 387 #end if |
346 | |
347 | |
348 ### visual pixel control | |
349 | |
350 levels(position_df\$annotation) = factor(paste(1:length(levels(position_df\$annotation)), levels(position_df\$annotation), sep="_")) | |
351 | |
352 pixel_image = ggplot(position_df, aes(x=x, y=y, fill=annotation))+ | |
353 geom_tile(height = 1, width=1)+ | |
354 coord_fixed()+ | |
355 ggtitle("Spatial orientation of filtered pixels")+ | |
356 theme_bw()+ | |
357 theme(plot.title = element_text(hjust = 0.5))+ | |
358 theme(text=element_text(family="ArialMT", face="bold", size=12))+ | |
359 theme(legend.position="bottom",legend.direction="vertical")+ | |
360 theme(legend.key.size = unit(0.2, "line"), legend.text = element_text(size = 6))+ | |
361 guides(fill=guide_legend(ncol=4,byrow=TRUE)) | |
362 | |
363 coord_labels = aggregate(cbind(x,y)~annotation, data=position_df, mean, na.rm=TRUE, na.action="na.pass") | |
364 coord_labels\$file_number = 1:length(levels(position_df\$annotation)) | |
365 | |
366 for(file_count in 1:nrow(coord_labels)) | |
367 {pixel_image = pixel_image + annotate("text",x=coord_labels[file_count,"x"], | |
368 y=coord_labels[file_count,"y"],label=toString(coord_labels[file_count,4]))} | |
369 | |
370 print(pixel_image) | |
371 | |
372 ### control features which are removed | |
373 hist(mz(msidata), xlab="m/z", main="Kept m/z values") | |
374 #if str($features_cond.features_filtering) == "none": | |
375 print("no difference histogram as no m/z filtering took place") | |
376 #else: | |
377 | |
378 if (isTRUE(all.equal(featuresinfile, mz(msidata)))){ | |
379 print("No difference in m/z values before and after filtering, no histogram drawn") | |
380 }else{ | |
381 hist(setdiff(featuresinfile, mz(msidata)), xlab="m/z", main="Removed m/z values")} | |
382 #end if | |
383 | |
384 dev.off() | |
385 | 388 |
386 | 389 |
387 }else{ | 390 }else{ |
388 print("Inputfile or filtered file has no intensities > 0") | 391 print("Inputfile or filtered file has no intensities > 0") |
389 dev.off() | 392 dev.off() |
390 } | 393 } |
394 | |
391 ]]></configfile> | 395 ]]></configfile> |
392 </configfiles> | 396 </configfiles> |
393 <inputs> | 397 <inputs> |
394 <expand macro="reading_msidata"/> | 398 <expand macro="reading_msidata"/> |
395 <conditional name="pixels_cond"> | 399 <conditional name="pixels_cond"> |
396 <param name="pixel_filtering" type="select" label="Select pixel filtering option"> | 400 <param name="pixel_filtering" type="select" label="Select pixel filtering option"> |
397 <option value="none" selected="True">none</option> | 401 <option value="none" selected="True">none</option> |
398 <option value="two_columns">list of pixel coordinates (tabular file)</option> | 402 <option value="two_columns">coordinates from tabular file</option> |
399 <option value="pixel_range">ranges for x and y (manually)</option> | 403 <option value="pixel_range">ranges for x and y (manually)</option> |
400 </param> | 404 </param> |
401 <when value="none"/> | 405 <when value="none"/> |
402 <when value="two_columns"> | 406 <when value="two_columns"> |
403 <expand macro="reading_pixel_annotations"/> | 407 <param name="annotation_file" type="data" format="tabular" label="Tabular file with pixel coordinates" |
404 | 408 help="Tabular file with two columns: x values and y values"/> |
409 <param name="column_x" data_ref="annotation_file" label="Column with x values" type="data_column"/> | |
410 <param name="column_y" data_ref="annotation_file" label="Column with y values" type="data_column"/> | |
411 <param name="tabular_header" type="boolean" label="Tabular file contains a header line" truevalue="TRUE" falsevalue="FALSE"/> | |
405 </when> | 412 </when> |
406 <when value="pixel_range"> | 413 <when value="pixel_range"> |
407 <param name="min_x_range" type="integer" value="0" label="Minimum value for x"/> | 414 <param name="min_x_range" type="integer" value="0" label="Minimum value for x"/> |
408 <param name="max_x_range" type="integer" value="100" label="Maximum value for x"/> | 415 <param name="max_x_range" type="integer" value="100" label="Maximum value for x"/> |
409 <param name="min_y_range" type="integer" value="0" label="Minimum value for y"/> | 416 <param name="min_y_range" type="integer" value="0" label="Minimum value for y"/> |
412 </conditional> | 419 </conditional> |
413 | 420 |
414 <conditional name="features_cond"> | 421 <conditional name="features_cond"> |
415 <param name="features_filtering" type="select" label="Select m/z feature filtering option"> | 422 <param name="features_filtering" type="select" label="Select m/z feature filtering option"> |
416 <option value="none" selected="True">none</option> | 423 <option value="none" selected="True">none</option> |
417 <option value="features_list">keep a list of m/z (tabular file)</option> | 424 <option value="features_list">keep m/z (tabular file)</option> |
418 <option value="features_range">m/z range (manually)</option> | 425 <option value="features_range">m/z range (manually)</option> |
419 <option value="remove_features">remove a list of m/z (tabular file)</option> | 426 <option value="remove_features">remove m/z (tabular file)</option> |
420 </param> | 427 </param> |
421 <when value="none"/> | 428 <when value="none"/> |
422 <when value="features_list"> | 429 <when value="features_list"> |
423 <expand macro="reading_1_column_mz_tabular" label="Tabular file with m/z features to keep"/> | 430 <expand macro="reading_1_column_mz_tabular" label="Tabular file with m/z features to keep"/> |
424 </when> | 431 </when> |
433 <option value="ppm" selected="True">ppm</option> | 440 <option value="ppm" selected="True">ppm</option> |
434 <option value="Da">Da</option> | 441 <option value="Da">Da</option> |
435 </param> | 442 </param> |
436 </when> | 443 </when> |
437 </conditional> | 444 </conditional> |
438 <param name="imzml_output" type="boolean" label="Output of imzML file" truevalue="TRUE" falsevalue="FALSE"/> | 445 <param name="imzml_output" type="select" display = "radio" optional = "False" |
439 | 446 label="Output format" help= "Choose the output format"> |
447 <option value="imzml_format" selected="True">imzML</option> | |
448 <option value="rdata_format">RData</option> | |
449 </param> | |
440 </inputs> | 450 </inputs> |
441 | 451 |
442 <outputs> | 452 <outputs> |
443 <data format="rdata" name="msidata_filtered" label="${tool.name} on ${on_string}"/> | 453 <data format="imzml" name="outfile_imzml" label="${tool.name} on ${on_string}: imzML"> |
454 <filter>imzml_output =='imzml_format'</filter> | |
455 </data> | |
456 <data format="rdata" name="outfile_rdata" label="${tool.name} on ${on_string}: RData"> | |
457 <filter>imzml_output == 'rdata_format'</filter> | |
458 </data> | |
444 <data format="pdf" name="QC_overview" from_work_dir="filtertool_QC.pdf" label = "${tool.name} on ${on_string}: QC"/> | 459 <data format="pdf" name="QC_overview" from_work_dir="filtertool_QC.pdf" label = "${tool.name} on ${on_string}: QC"/> |
445 <data format="imzml" name="outfile_imzml" label="${tool.name} on ${on_string}: imzML"> | |
446 <filter>imzml_output</filter> | |
447 </data> | |
448 </outputs> | 460 </outputs> |
449 <tests> | 461 <tests> |
450 <test> | 462 <test> |
451 <expand macro="infile_imzml"/> | 463 <expand macro="infile_imzml"/> |
452 <param name="pixel_filtering" value="pixel_range"/> | 464 <param name="pixel_filtering" value="pixel_range"/> |
453 <param name="min_x_range" value="1"/> | 465 <param name="min_x_range" value="1"/> |
454 <param name="max_x_range" value="20"/> | 466 <param name="max_x_range" value="20"/> |
455 <param name="min_y_range" value="2"/> | 467 <param name="min_y_range" value="2"/> |
456 <param name="max_y_range" value="2"/> | 468 <param name="max_y_range" value="2"/> |
457 <param name="features_filtering" value="features_range"/> | 469 <param name="features_filtering" value="features_range"/> |
458 <param name="min_mz" value="350" /> | 470 <param name="min_mz" value="350"/> |
459 <param name="max_mz" value="500"/> | 471 <param name="max_mz" value="500"/> |
472 <param name="imzml_output" value="imzml_format"/> | |
460 <output name="QC_overview" file="imzml_filtered3.pdf" compare="sim_size"/> | 473 <output name="QC_overview" file="imzml_filtered3.pdf" compare="sim_size"/> |
461 <output name="msidata_filtered" file="imzml_filtered3.RData" compare="sim_size"/> | 474 <output name="outfile_imzml" ftype="imzml" file="out3.imzml.txt" compare="sim_size"> |
475 <extra_files type="file" file="out3.imzml" name="imzml" lines_diff="4"/> | |
476 <extra_files type="file" file="out3.ibd" name="ibd" compare="sim_size"/> | |
477 </output> | |
462 </test> | 478 </test> |
463 <test> | 479 <test> |
464 <expand macro="infile_imzml"/> | 480 <expand macro="infile_imzml"/> |
465 <param name="pixel_filtering" value="two_columns"/> | 481 <param name="pixel_filtering" value="two_columns"/> |
466 <param name="annotation_file" ftype="tabular" value = "inputpixels_2column.tabular"/> | 482 <param name="annotation_file" ftype="tabular" value="inputpixels_2column.tabular"/> |
467 <param name="column_x" value="1"/> | 483 <param name="column_x" value="1"/> |
468 <param name="column_y" value="3"/> | 484 <param name="column_y" value="3"/> |
469 <param name="column_names" value="2"/> | 485 <param name="imzml_output" value="imzml_format"/> |
470 <output name="QC_overview" file="imzml_filtered4.pdf" compare="sim_size"/> | 486 <output name="QC_overview" file="imzml_filtered4.pdf" compare="sim_size"/> |
471 <output name="msidata_filtered" file="imzml_filtered4.RData" compare="sim_size"/> | 487 <output name="outfile_imzml" ftype="imzml" file="out4.imzml.txt" compare="sim_size"> |
472 <!--imzml output test not yet working: output name="outfile_imzml" file="filtering_imzmls/summary" compare="sim_size" delta="10000"> | 488 <extra_files type="file" file="out4.imzml" name="imzml" lines_diff="4"/> |
473 <extra_files type="file" name="imzml" value="filtering_imzmls/out4.imzML" compare="sim_size" delta="10000"/> | 489 <extra_files type="file" file="out4.ibd" name="ibd" compare="sim_size"/> |
474 <extra_files type="file" name="ibd" value="filtering_imzmls/out4.ibd" compare="sim_size" delta="10000"/> | 490 </output> |
475 </output--> | |
476 </test> | 491 </test> |
477 <test> | 492 <test> |
478 <expand macro="infile_imzml"/> | 493 <expand macro="infile_imzml"/> |
479 <param name="pixel_filtering" value="pixel_range"/> | 494 <param name="pixel_filtering" value="pixel_range"/> |
480 <param name="min_x_range" value="0"/> | 495 <param name="min_x_range" value="0"/> |
483 <param name="max_y_range" value="20"/> | 498 <param name="max_y_range" value="20"/> |
484 <param name="features_filtering" value="features_list"/> | 499 <param name="features_filtering" value="features_list"/> |
485 <param name="mz_tabular" ftype="tabular" value = "featuresofinterest5.tabular"/> | 500 <param name="mz_tabular" ftype="tabular" value = "featuresofinterest5.tabular"/> |
486 <param name="feature_column" value="1"/> | 501 <param name="feature_column" value="1"/> |
487 <param name="feature_header" value="0"/> | 502 <param name="feature_header" value="0"/> |
503 <param name="imzml_output" value="imzml_format"/> | |
488 <output name="QC_overview" file="imzml_filtered5.pdf" compare="sim_size"/> | 504 <output name="QC_overview" file="imzml_filtered5.pdf" compare="sim_size"/> |
489 <output name="msidata_filtered" file="imzml_filtered5.RData" compare="sim_size" /> | 505 <output name="outfile_imzml" ftype="imzml" file="out5.imzml.txt" compare="sim_size"> |
506 <extra_files type="file" file="out5.imzml" name="imzml" lines_diff="4"/> | |
507 <extra_files type="file" file="out5.ibd" name="ibd" compare="sim_size"/> | |
508 </output> | |
490 </test> | 509 </test> |
491 <test> | 510 <test> |
492 <expand macro="infile_analyze75"/> | 511 <expand macro="infile_analyze75"/> |
512 <param name="imzml_output" value="imzml_format"/> | |
493 <output name="QC_overview" file="analyze75_filtered2.pdf" compare="sim_size"/> | 513 <output name="QC_overview" file="analyze75_filtered2.pdf" compare="sim_size"/> |
494 <output name="msidata_filtered" file="analyze_filteredoutside.RData" compare="sim_size" /> | 514 <output name="outfile_imzml" ftype="imzml" file="out6.imzml.txt" compare="sim_size"> |
515 <extra_files type="file" file="out6.imzml" name="imzml" lines_diff="4"/> | |
516 <extra_files type="file" file="out6.ibd" name="ibd" compare="sim_size"/> | |
517 </output> | |
495 </test> | 518 </test> |
496 <test> | 519 <test> |
497 <param name="infile" value="preprocessed.RData" ftype="rdata"/> | 520 <param name="infile" value="preprocessed.RData" ftype="rdata"/> |
498 <conditional name="outputs"> | 521 <conditional name="outputs"> |
499 <param name="outputs_select" value="no_quality_control"/> | 522 <param name="outputs_select" value="no_quality_control"/> |
500 </conditional> | 523 </conditional> |
501 <output name="msidata_filtered" file="rdata_notfiltered.RData" compare="sim_size"/> | 524 <param name="imzml_output" value="imzml_format"/> |
502 <output name="QC_overview" file="rdata_notfiltered.pdf" compare="sim_size" /> | 525 <output name="QC_overview" file="rdata_notfiltered.pdf" compare="sim_size" /> |
526 <output name="outfile_imzml" ftype="imzml" file="out7.imzml.txt" compare="sim_size"> | |
527 <extra_files type="file" file="out7.imzml" name="imzml" lines_diff="4"/> | |
528 <extra_files type="file" file="out7.ibd" name="ibd" compare="sim_size"/> | |
529 </output> | |
503 </test> | 530 </test> |
504 </tests> | 531 </tests> |
505 <help> | 532 <help> |
506 <![CDATA[ | 533 <![CDATA[ |
507 | 534 |
511 | 538 |
512 This tool provides options to filter (subset) pixels and m/z features of mass spectrometry imaging data. | 539 This tool provides options to filter (subset) pixels and m/z features of mass spectrometry imaging data. |
513 | 540 |
514 @MSIDATA_INPUT_DESCRIPTION@ | 541 @MSIDATA_INPUT_DESCRIPTION@ |
515 | 542 |
516 @SPECTRA_TABULAR_INPUT_DESCRIPTION@ | 543 - Optional file with pixel coordinates and annotation: |
544 | |
545 - Tabular file: One column with x values, one column with y values | |
546 - The file is allowed to have any column names as header (in this case set "Tabular file contains a header line" to yes) | |
547 - Pixel with coordinates outside the coordinates of the input file are ignored | |
548 | |
549 :: | |
550 | |
551 x_coord y_coord | |
552 1 1 | |
553 2 1 | |
554 3 1 | |
555 ... | |
556 ... | |
517 | 557 |
518 @MZ_TABULAR_INPUT_DESCRIPTION@ | 558 @MZ_TABULAR_INPUT_DESCRIPTION@ |
519 | 559 |
520 **Options** | 560 **Options** |
521 | 561 |
522 - pixel filtering/annotation: either with a tabular file containing x and y coordinates and pixel annotations or by defining a range for x and y by hand (for the latter no annotation is possible). Pixel that are not present in the dataset are ignored. It is not possible to filter only for pixels that are not present in the dataset. | 562 - Pixel filtering/annotation: |
523 - m/z feature filtering: m/z values for filtering should be either imported as a tabular file containing containing m/z of interest or by defining a range for the m/z values. m/z that are not present in the dataset are ignored. It is not possible to filter only for m/z that are not present in the dataset. | 563 |
524 - m/z feature removing: perturbing m/z features such as matrix contaminants can be removed by specifying their m/z in a tabular file, optionally with a half window size in ppm or m/z for the window in which peaks should be removed. | 564 - Either with a tabular file containing x and y coordinates or by entering x-min, x-max, y-min, y-max manually |
565 - Pixel that are not present in the dataset are ignored | |
566 - An error occurs if the input for filtering (tabular file, x-range or y-range) contains not a single coordinate that occurs in the input dataset | |
567 | |
568 | |
569 - m/z feature filtering: | |
570 | |
571 - Either with a tabular file containing m/z values or by entering m/z-min and m/z-max manually | |
572 - m/z values that are not present in the dataset are ignored | |
573 - An error occurs if the input for filtering (tabular file or mz-range) contains not a single m/z feature that occurs in the dataset | |
574 | |
575 | |
576 - m/z feature removing: | |
577 | |
578 - Perturbing m/z features such as matrix contaminants can be removed by specifying their m/z value in a tabular file, optionally with a half window size in ppm or m/z for the window in which peaks should be removed | |
525 | 579 |
526 | 580 |
527 **Tips** | 581 **Tips** |
528 | 582 |
529 - Numeric m/z features imported via a tabular file and m/z features of the dataset are rounded to 4 decimal points (or maximum number of decimal points of input m/z) and then matched. Therefore, it is recommended to use the filtering tool only for m/z which have been extracted from the same dataset. If the m/z values are from a different dataset, the tool "Join two files on column allowing a small difference" should be used to find corresponding m/z values, which can then be used for filtering. | 583 - m/z feautre filtering with a tabular file: |
530 - In case tabular file cannot be selected in drop-down menu: Datatype in Galaxy must be tabular otherwise file will not appear in selection window (if Galaxy auto-detection was wrong, datatype can be changed by pressing the pen button (edit attributes)) | 584 |
585 - For matching the m/z features of the input dataset are rounded to the number of decimal points of the m/z values from the tabular file. In case the input had more than 4 digits m/z values of dataset and tabular file are rounded to 4 digits. | |
586 - Therefore, it is recommended to use the filtering tool only for m/z features which have been extracted from the same dataset. If the m/z values are from a different dataset, the tool 'Join two files on column allowing a small difference' should be used to find corresponding m/z values, which can then be used for filtering. | |
587 | |
588 - Problems to select tabular file: | |
589 | |
590 - In case tabular file cannot be selected in drop-down menu: Datatype in Galaxy might be different from 'tabular' - datatype can be changed by pressing the pen button of the dataset (edit attributes) | |
531 | 591 |
532 | 592 |
533 **Output** | 593 **Output** |
534 | 594 |
535 - MSI data as .RData output (can be read with the Cardinal package in R) | 595 - MSI data as imzML file or .RData (can be read with the Cardinal package in R) |
536 - optional: MSI data as imzML file | |
537 - pdf with heatmap showing the pixels that are left after filtering and histograms of kept and removed m/z | 596 - pdf with heatmap showing the pixels that are left after filtering and histograms of kept and removed m/z |
538 | 597 |
539 | 598 |
540 ]]> | 599 ]]> |
541 </help> | 600 </help> |