comparison msi_filtering.xml @ 0:f17d3f1a065f draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/msi_filtering commit 3363c40790b0d64a085f980980f4289165eed27f
author galaxyp
date Wed, 28 Feb 2018 14:02:21 -0500
parents
children 98c101b19f3c
comparison
equal deleted inserted replaced
-1:000000000000 0:f17d3f1a065f
1 <tool id="mass_spectrometry_imaging_filtering" name="MSI filtering" version="1.7.0">
2 <description>tool for filtering mass spectrometry imaging data</description>
3 <requirements>
4 <requirement type="package" version="1.7.0">bioconductor-cardinal</requirement>
5 <requirement type="package" version="2.2.1">r-gridextra</requirement>
6 </requirements>
7 <command detect_errors="exit_code">
8 <![CDATA[
9
10 #if $infile.ext == 'imzml'
11 cp '${infile.extra_files_path}/imzml' infile.imzML &&
12 cp '${infile.extra_files_path}/ibd' infile.ibd &&
13 #elif $infile.ext == 'analyze75'
14 cp '${infile.extra_files_path}/hdr' infile.hdr &&
15 cp '${infile.extra_files_path}/img' infile.img &&
16 cp '${infile.extra_files_path}/t2m' infile.t2m &&
17 #else
18 ln -s $infile infile.RData &&
19 #end if
20 cat '${MSI_subsetting}' &&
21 echo ${MSI_subsetting} &&
22 Rscript '${MSI_subsetting}'
23
24 ]]>
25 </command>
26 <configfiles>
27 <configfile name="MSI_subsetting"><![CDATA[
28
29
30 ################################# load libraries and read file #########################
31
32
33 library(Cardinal)
34 library(gridExtra)
35
36 ## Read MALDI Imaging dataset
37
38 #if $infile.ext == 'imzml'
39 msidata = readMSIData('infile.imzML')
40 #elif $infile.ext == 'analyze75'
41 msidata = readMSIData('infile.hdr')
42 #else
43 load('infile.RData')
44 #end if
45
46 ###################################### inputfile properties in numbers ######################
47
48 #if $outputs.outputs_select == "quality_control"
49 ## Number of features (mz)
50 maxfeatures = length(features(msidata))
51 ## Range mz
52 minmz = round(min(mz(msidata)), digits=2)
53 maxmz = round(max(mz(msidata)), digits=2)
54 ## Number of spectra (pixels)
55 pixelcount = length(pixels(msidata))
56 ## Range x coordinates
57 minimumx = min(coord(msidata)[,1])
58 maximumx = max(coord(msidata)[,1])
59 ## Range y coordinates
60 minimumy = min(coord(msidata)[,2])
61 maximumy = max(coord(msidata)[,2])
62 ## Number of intensities > 0
63 npeaks= sum(spectra(msidata)[]>0)
64 ## Spectra multiplied with mz (potential number of peaks)
65 numpeaks = ncol(spectra(msidata)[])*nrow(spectra(msidata)[])
66 ## Percentage of intensities > 0
67 percpeaks = round(npeaks/numpeaks*100, digits=2)
68 ## Number of empty TICs
69 TICs = colSums(spectra(msidata)[])
70 NumemptyTIC = sum(TICs == 0)
71 ## median TIC
72 medint = round(median(TICs), digits=2)
73 ## Store features for QC plot
74 featuresinfile = mz(msidata)
75 #end if
76
77
78 ###################################### filtering of pixels ######################
79 #if $inputpixels:
80 input_list = read.delim("$inputpixels", header = FALSE,
81 na.strings=c("","NA", "#NUM!", "#ZAHL!"), stringsAsFactors = FALSE)
82 validpixels = input_list[,$pixel_column] %in% names(pixels(msidata))
83
84 if (validpixels != 0)
85 {
86 pixelsofinterest = pixels(msidata)[names(pixels(msidata)) %in% input_list[validpixels,$pixel_column]]
87 msidata = msidata[,pixelsofinterest]
88 numberpixels = length(input_list[,$pixel_column])
89 }else {
90 numberpixels = 0
91 }
92
93
94 #else
95 input_list = data.frame(0, 0)
96 validpixels=0
97 numberpixels = 0
98 #end if
99
100
101
102 ###################################### filtering of features ######################
103
104 #if $inputfeatures:
105 input_features = read.delim("$inputfeatures", header = FALSE,
106 na.strings=c("","NA", "#NUM!", "#ZAHL!"), stringsAsFactors = FALSE)
107 validfeatures = input_features[,$feature_column] %in% names(features(msidata))
108
109 if (validfeatures != 0)
110 {
111 featuresofinterest = features(msidata)[names(features(msidata)) %in% input_features[validfeatures,$feature_column]]
112 msidata = msidata[featuresofinterest,]
113 numberfeatures = length(input_features[,$feature_column])
114 } else {
115 numberfeatures = 0
116 }
117
118
119 #else
120 input_features = data.frame(0, 0)
121 validfeatures = 0
122 numberfeatures = 0
123 #end if
124
125
126
127
128
129
130 # save msidata as Rfile
131 save(msidata, file="$msidata_filtered")
132
133 ###################################### outputfile properties in numbers ######################
134
135 #if $outputs.outputs_select == "quality_control"
136
137 ## Number of features (mz)
138 maxfeatures2 = length(features(msidata))
139 ## Range mz
140 minmz2 = round(min(mz(msidata)), digits=2)
141 maxmz2 = round(max(mz(msidata)), digits=2)
142 ## Number of spectra (pixels)
143 pixelcount2 = length(pixels(msidata))
144 ## Range x coordinates
145 minimumx2 = min(coord(msidata)[,1])
146 maximumx2 = max(coord(msidata)[,1])
147 ## Range y coordinates
148 minimumy2 = min(coord(msidata)[,2])
149 maximumy2 = max(coord(msidata)[,2])
150 ## Number of intensities > 0
151 npeaks2= sum(spectra(msidata)[]>0)
152 ## Spectra multiplied with mz (potential number of peaks)
153 numpeaks2 = ncol(spectra(msidata)[])*nrow(spectra(msidata)[])
154 ## Percentage of intensities > 0
155 percpeaks2 = round(npeaks2/numpeaks2*100, digits=2)
156 ## Number of empty TICs
157 TICs2 = colSums(spectra(msidata)[])
158 NumemptyTIC2 = sum(TICs2 == 0)
159 ## median TIC
160 medint2 = round(median(TICs2), digits=2)
161
162
163 properties = c("Number of mz features",
164 "Range of mz values [Da]",
165 "Number of pixels",
166 "Range of x coordinates",
167 "Range of y coordinates",
168 "Intensities > 0",
169 "Median TIC per pixel",
170 "Number of zero TICs",
171 paste0("# pixels in ", "$inputpixels.display_name"),
172 paste0("# mz in ", "$inputfeatures.display_name"))
173
174 before = c(paste0(maxfeatures),
175 paste0(minmz, " - ", maxmz),
176 paste0(pixelcount),
177 paste0(minimumx, " - ", maximumx),
178 paste0(minimumy, " - ", maximumy),
179 paste0(percpeaks, " %"),
180 paste0(medint),
181 paste0(NumemptyTIC),
182 paste0("input pixels: ", numberpixels),
183 paste0("input mz: ", numberfeatures))
184
185 filtered = c(paste0(maxfeatures2),
186 paste0(minmz2, " - ", maxmz2),
187 paste0(pixelcount2),
188 paste0(minimumx2, " - ", maximumx2),
189 paste0(minimumy2, " - ", maximumy2),
190 paste0(percpeaks2, " %"),
191 paste0(medint2),
192 paste0(NumemptyTIC2),
193 paste0("valid pixels: ", sum(validpixels)),
194 paste0("valid mz: ", sum(validfeatures)))
195
196
197 property_df = data.frame(properties, before, filtered)
198
199
200
201 ######################################## PDF QC #############################################
202
203
204 pdf("filtertool_QC.pdf", fonts = "Times", pointsize = 12)
205 plot(0,type='n',axes=FALSE,ann=FALSE)
206
207 title(main=paste0("Qualitycontrol of filtering tool for file: \n\n", "$infile.display_name"))
208
209
210 grid.table(property_df, rows= NULL)
211
212
213 ### heatmap image as visual pixel control
214
215
216 image(msidata, mz=$outputs.inputmz, plusminus = $outputs.plusminus_dalton, contrast.enhance = "none",
217 main= paste0($outputs.inputmz," ± ", $outputs.plusminus_dalton, " Da"), ylim = c(maximumy2+0.2*maximumy2,minimumy2-0.2*minimumy2))
218
219 ### control features which are left
220
221 par(mfrow = c(2,1))
222 plot(featuresinfile, ylab = "m/z in Dalton", xlab = "feature index")
223 plot(mz(msidata), ylab = "m/z in Dalton", xlab = "feature index")
224
225
226 dev.off()
227
228 #end if
229
230 ######################################## intensity matrix ##################################
231
232 #if $output_matrix:
233
234 if (length(features(msidata))> 0 & length(pixels(msidata)) > 0)
235 {
236
237 spectramatrix = spectra(msidata)
238 rownames(spectramatrix) = mz(msidata)
239 newmatrix = rbind(pixels(msidata), spectramatrix)
240 write.table(newmatrix[2:nrow(newmatrix),], file="$matrixasoutput", quote = FALSE, row.names = TRUE, col.names=NA, sep = "\t")
241
242 }else{
243 print("file has no features or pixels left")
244 }
245
246 #end if
247
248
249 ]]></configfile>
250 </configfiles>
251 <inputs>
252 <param name="infile" type="data" format="imzml, rdata, analyze75"
253 label="Inputfile as imzML, Analyze7.5 or Cardinal MSImageSet saved as RData"
254 help="Upload composite datatype imzml (ibd+imzML) or analyze75 (hdr+img+t2m) or regular upload .RData (Cardinal MSImageSet)"/>
255 <param name="inputpixels" type="data" optional="true" format="tabular" label="pixels for filtering of MSI data"
256 help="tabular file with pixels of interest in the form x = 1, y = 1"/>
257 <param name="pixel_column" data_ref="inputpixels" optional="true" label="Column with pixels" type="data_column" />
258 <param name="inputfeatures" type="data" optional="true" format="tabular" label="features for filtering of MSI data"
259 help="tabular file with masses of interest in the form mz = 800.05"/>
260 <param name="feature_column" data_ref="inputfeatures" optional="true" label="Column with features" type="data_column" />
261
262 <conditional name="outputs">
263 <param name="outputs_select" type="select" label="Quality control output">
264 <option value="quality_control" selected="True">yes</option>
265 <option value="no_quality_control" >no</option>
266 </param>
267 <when value="quality_control">
268 <param name="inputmz" type="float" value="1296.7" label="Mass for which a heatmap image will be drawn" help="Use a mass which is still present in all pixels to control if the pixel filtering went well"/>
269 <param name="plusminus_dalton" value="0.25" type="float" label="mass range for mz value" help="plusminus mass window in Dalton"/>
270 </when>
271 </conditional>
272 <param name="output_matrix" type="boolean" display="radio" label="Intensity matrix output"/>
273 </inputs>
274 <outputs>
275 <data format="rdata" name="msidata_filtered" label="${tool.name} on $infile.display_name"/>
276 <data format="pdf" name="filtering_qc" from_work_dir="filtertool_QC.pdf" label = "QC ${tool.name} on $infile.display_name">
277 <filter>outputs["outputs_select"] == "quality_control"</filter>
278 </data>
279 <data format="tabular" name="matrixasoutput" label="matrix ${tool.name} on $infile.display_name">
280 <filter>output_matrix</filter>
281 </data>
282 </outputs>
283
284 <tests>
285 <test expect_num_outputs="2">
286 <param name="infile" value="" ftype="imzml">
287 <composite_data value="Example_Continuous.imzML"/>
288 <composite_data value="Example_Continuous.ibd"/>
289 </param>
290 <param name="inputpixels" ftype="tabular" value = "inputpixels.tabular"/>
291 <param name="pixel_column" value="1"/>
292 <param name="inputfeatures" ftype="tabular" value = "inputfeatures.tabular"/>
293 <param name="feature_column" value="2"/>
294
295 <conditional name="outputs">
296 <param name="outputs_select" value="quality_control"/>
297 <param name="inputmz" value="328.9"/>
298 <param name="plusminus_dalton" value="0.25"/>
299 </conditional>
300 <output name="filtering_qc" file="imzml_filtered.pdf" compare="sim_size" delta="20000"/>
301 <output name="msidata_filtered" file="imzml_filtered.RData" compare="sim_size" />
302 </test>
303 <test expect_num_outputs="3">
304 <param name="infile" value="" ftype="analyze75">
305 <composite_data value="Analyze75.hdr"/>
306 <composite_data value="Analyze75.img"/>
307 <composite_data value="Analyze75.t2m"/>
308 </param>
309 <param name="inputpixels" ftype="tabular" value = "inputpixels2.tabular"/>
310 <param name="pixel_column" value="1"/>
311 <param name="inputfeatures" ftype="tabular" value = "featuresofinterest2.tabular"/>
312 <param name="feature_column" value="1"/>
313 <conditional name="outputs">
314 <param name="outputs_select" value="quality_control"/>
315 <param name="inputmz" value="702"/>
316 <param name="plusminus_dalton" value="0.25"/>
317 </conditional>
318 <param name="output_matrix" value="True"/>
319 <output name="filtering_qc" file="analyze_filtered.pdf" compare="sim_size" delta="20000"/>
320 <output name="msidata_filtered" file="analyze_filtered.RData" compare="sim_size" />
321 <output name="matrixasoutput" file="analyze_matrix.tabular"/>
322 </test>
323 <test expect_num_outputs="1">
324 <param name="infile" value="" ftype="analyze75">
325 <composite_data value="Analyze75.hdr"/>
326 <composite_data value="Analyze75.img"/>
327 <composite_data value="Analyze75.t2m"/>
328 </param>
329 <conditional name="outputs">
330 <param name="outputs_select" value="no_quality_control"/>
331 </conditional>
332 <output name="msidata_filtered" file="analyze_originaloutput.RData" compare="sim_size" />
333 </test>
334 <test expect_num_outputs="2">
335 <param name="infile" value="preprocessing_results1.RData" ftype="rdata"/>
336 <conditional name="outputs">
337 <param name="outputs_select" value="no_quality_control"/>
338 </conditional>
339 <param name="output_matrix" value="True"/>
340 <output name="matrixasoutput" file="rdata_matrix.tabular"/>
341 </test>
342 </tests>
343 <help>
344 <![CDATA[
345
346 This tool can filter three types of mass-spectrometry imaging files (see below) for pixels and features of interest. This can be used to keep only pixels in a regions of interest.
347 For filtering at least one valid pixel/feature is needed otherwise no filtering will be performed.
348
349 Input data: 3 types of input data can be used:
350
351 - imzml file (upload imzml and ibd file via the "composite" function) `Introduction to the imzml format <http://ms-imaging.org/wp/introduction/>`_
352 - Analyze7.5 (upload hdr, img and t2m file via the "composite" function)
353 - Cardinal "MSImageSet" data (with variable name "msidata", saved as .RData)
354
355 The output of this tool is a subsetted Cardinal "MSImageSet" with the variable name "msidata" saved as .RData.
356 ]]>
357 </help>
358 <citations>
359 <citation type="doi">10.1093/bioinformatics/btv146</citation>
360 </citations>
361 </tool>