0
|
1 <tool id="abims_xcms_xcmsSet" name="xcms.xcmsSet" version="2.0.2">
|
|
2
|
|
3 <description>Filtration and Peak Identification using xcmsSet function from xcms R package to preprocess LC/MS data for relative quantification and statistical analysis </description>
|
|
4
|
|
5 <requirements>
|
|
6 <requirement type="package" version="3.1.2">R</requirement>
|
|
7 <requirement type="binary">Rscript</requirement>
|
|
8 <requirement type="package" version="1.44.0">xcms</requirement>
|
|
9 <requirement type="package" version="2.1">xcms_w4m_script</requirement>
|
|
10 </requirements>
|
|
11
|
|
12 <stdio>
|
|
13 <exit_code range="1:" level="fatal" />
|
|
14 </stdio>
|
|
15
|
|
16 <command>
|
|
17 xcms.r
|
|
18 #if $inputs.input == "lib":
|
|
19 library $__app__.config.user_library_import_dir/$__user_email__/$inputs.library
|
|
20 #elif $inputs.input == "zip_file":
|
|
21 zipfile $inputs.zip_file
|
|
22 #end if
|
|
23
|
|
24 xfunction xcmsSet
|
|
25 ## profmethod $profmethod
|
|
26 nSlaves \${GALAXY_SLOTS:-1} method $methods.method
|
|
27 #if $methods.method == "centWave":
|
|
28 ppm $methods.ppm
|
|
29 peakwidth "c($methods.peakwidth)"
|
|
30 #if $methods.options_scanrange.option == "show":
|
|
31 scanrange "c($methods.options_scanrange.scanrange)"
|
|
32 #end if
|
|
33 #if $methods.options_c.option == "show":
|
|
34 mzdiff $methods.options_c.mzdiff
|
|
35 snthresh $methods.options_c.snthresh
|
|
36 integrate $methods.options_c.integrate
|
|
37 noise $methods.options_c.noise
|
|
38 prefilter "c($methods.options_c.prefilter)"
|
|
39 #end if
|
|
40 #elif $methods.method == "matchedFilter":
|
|
41 step $methods.step
|
|
42 fwhm $methods.fwhm
|
|
43 #if $methods.options_m.option == "show":
|
|
44 ## sigma "$methods.options_m.sigma"
|
|
45 max $methods.options_m.max
|
|
46 snthresh $methods.options_m.snthresh
|
|
47 ## mzdiff $methods.options_m.mzdiff
|
|
48 steps $methods.options_m.steps
|
|
49 ## sleep $methods.options_m.sleep
|
|
50 #end if
|
|
51 #elif $methods.method == "MSW":
|
|
52 snthr $methods.snthr
|
|
53 nearbyPeak $methods.nearbyPeak
|
|
54 winSize.noise $methods.winSize_noise
|
|
55 amp.Th $methods.amp_Th
|
|
56 scales "c($methods.scales)"
|
|
57 SNR.method "$methods.SNR_method"
|
|
58 #end if
|
|
59 && (mv xcmsSet.RData $xsetRData;
|
|
60 mv sampleMetadata.tsv $sampleMetadata;
|
|
61 mv TICs_raw.pdf $ticsRawPdf;
|
|
62 mv BPCs_raw.pdf $bpcsRawPdf;
|
|
63 mv xset.log $log);
|
|
64 cat $log
|
|
65 </command>
|
|
66
|
|
67 <inputs>
|
|
68
|
|
69 <conditional name="inputs">
|
|
70 <param name="input" type="select" label="Choose your inputs method" >
|
|
71 <option value="zip_file" selected="true">Zip file from your history containing your chromatograms</option>
|
|
72 <option value="lib" >Library directory name</option>
|
|
73 </param>
|
|
74 <when value="zip_file">
|
|
75 <param name="zip_file" type="data" format="no_unzip.zip" label="Zip file" />
|
|
76 </when>
|
|
77 <when value="lib">
|
|
78 <param name="library" type="text" size="40" label="Library directory name" help="The name of your directory containing all your data" >
|
|
79 <validator type="empty_field"/>
|
|
80 </param>
|
|
81 </when>
|
|
82
|
|
83 </conditional>
|
|
84
|
|
85
|
|
86 <!--
|
|
87 <param name="profmethod" type="select" label="Method to use for profile generation (profmethod)" >
|
|
88 <option value="bin" selected="true">bin</option>
|
|
89 <option value="binlin">binlin</option>
|
|
90 <option value="binlinbase">binlinbase</option>
|
|
91 <option value="intlin">intlin</option>
|
|
92 </param>
|
|
93 <param name="nSlaves" type="integer" value="9" label="MPI-slaves CPU" help="number of MPI-slaves to use for parallel peak detection" />
|
|
94 -->
|
|
95 <conditional name="methods">
|
|
96 <param name="method" type="select" label="Extraction method for peaks detection" help="[method] See the help section below">
|
|
97 <option value="centWave" >centWave</option>
|
|
98 <option value="matchedFilter" selected="true">matchedFilter</option>
|
|
99 <option value="MSW">MSW</option>
|
|
100 </param>
|
|
101
|
|
102 <!-- centWave Filter options -->
|
|
103 <when value="centWave">
|
|
104 <param name="ppm" type="integer" value="25" label="Max tolerated ppm m/z deviation in consecutive scans in ppm" help="[ppm]" />
|
|
105 <param name="peakwidth" type="text" value="20,50" label="Min,Max peak width in seconds" help="[peakwidth]" />
|
|
106 <conditional name="options_scanrange">
|
|
107 <param name="option" type="select" label="Scan range option " >
|
|
108 <option value="show">show</option>
|
|
109 <option value="hide" selected="true">hide</option>
|
|
110 </param>
|
|
111 <when value="show">
|
|
112 <param name="scanrange" type="text" value="" label="scanrange" help="scan range to process, for example (16,365)" >
|
|
113 <validator type="empty_field"/>
|
|
114 </param>
|
|
115 </when>
|
|
116 </conditional>
|
|
117
|
|
118 <conditional name="options_c">
|
|
119 <param name="option" type="select" label="Advanced options" >
|
|
120 <option value="show">show</option>
|
|
121 <option value="hide" selected="true">hide</option>
|
|
122 </param>
|
|
123 <when value="show">
|
|
124 <param name="snthresh" type="integer" value="10" label="Signal/Noise threshold" help="[snthresh] Signal to noise ratio cutoff" />
|
|
125 <param name="mzdiff" type="float" value="-0.001" label="Min m/z difference" help="[mzdiff] Min m/z difference for peaks with overlapping RT " />
|
|
126 <param name="integrate" type="select" label="peak limits method" help="[integrate]" >
|
|
127 <option value="1">peak limits based on smoothed 2nd derivative (less precise)</option>
|
|
128 <option value="2">peak limits based on real data (more sensitive to noise)</option>
|
|
129 </param>
|
|
130 <param name="prefilter" type="text" value="3,100" label="Prefilter step for the first phase" help="[prefilter] Separate by coma k,I. Mass traces are only retained if they contain at least ‘k’ peaks with intensity >= ‘I’"/>
|
|
131 <param name="noise" type="integer" value="0" label="Noise filter" help="[noise] optional argument which is useful for data that was centroided without any intensity threshold, centroids with intensity smaller than ‘noise’ are omitted from ROI detection"/>
|
|
132 </when>
|
|
133 </conditional>
|
|
134 </when>
|
|
135
|
|
136 <!-- matched Filter options -->
|
|
137 <when value="matchedFilter">
|
|
138 <param name="step" type="float" value="0.01" label="Step size to use for profile generation" help="[step] The peak detection algorithm creates extracted ion base peak chromatograms (EIBPC) on a fixed step size" />
|
|
139 <param name="fwhm" type="integer" value="30" label="Full width at half maximum of matched filtration gaussian model peak" help="[fwhm] Only used to calculate the actual sigma" />
|
|
140 <conditional name="options_m">
|
|
141 <param name="option" type="select" label="Advanced options" >
|
|
142 <option value="show">show</option>
|
|
143 <option value="hide" selected="true">hide</option>
|
|
144 </param>
|
|
145 <when value="show">
|
|
146 <!--
|
|
147 <param name="sigma" type="hidden" value="fwhm/2.3548" label="sigma" help="standard deviation (fwhm/2.3548)" />
|
|
148 -->
|
|
149 <param name="max" type="integer" value="5" label="Maximum number of peaks per extracted ion chromatogram" help="[max]" />
|
|
150 <param name="snthresh" type="integer" value="10" label="Signal to noise ratio cutoff" help="[snthresh]" />
|
|
151 <param name="steps" type="integer" value="2" label="Number of steps to merge prior to filtration" help="[steps] The peak identification algorithm combines a given number of EIBPCs prior to filtration and peak detection, as defined by the steps argument" />
|
|
152 <!--
|
|
153 <param name="mzdiff" type="text" size="20" value="0.8-step*steps" label="m/z difference" help="min m/z difference for peaks with overlapping RT " />
|
|
154 -->
|
|
155 </when>
|
|
156 </conditional>
|
|
157 </when>
|
|
158
|
|
159 <!-- MSW Filter options -->
|
|
160 <when value="MSW">
|
|
161 <param name="nearbyPeak" type="select" label="Determine whether to include the nearby small peaks of major peaks" help="[nearbyPeak]" >
|
|
162 <option value="TRUE">TRUE</option>
|
|
163 <option value="FALSE">FALSE</option>
|
|
164 </param>
|
|
165 <param name="winSize_noise" type="integer" value="500" label="The local window size to estimate the noise level" help="[winSize.noise]" />
|
|
166 <param name="snthr" type="integer" value="3" label="SNR (Signal to Noise Ratio) threshold" help="[snthr]" />
|
|
167 <param name="amp_Th" type="float" value="0.002" label="Minimum required relative amplitude of the peak" help="[amp.Th] Ratio to the maximum of CWT coefficients" />
|
|
168 <param name="scales" type="text" value="seq(1,22,3)" label="Scales for the Continuous Wavelet Transform (CWT)" help="[scales] Scales are linked to the width of the peaks that are to be detected. Tape as indicaded seq('n,n,n') or c(n,n) : seq(from, to, by steps), c - linear vector " />
|
|
169 <param name="SNR_method" type="text" value="data.mean" label="SNR (Signal to Noise Ratio) method" help="[SNR.method] Method to estimate noise level. Currently, only 95 percentage quantile is supported." />
|
|
170 </when>
|
|
171 </conditional>
|
|
172 </inputs>
|
|
173
|
|
174 <outputs>
|
|
175 <data name="xsetRData" format="rdata.xcms.raw" label="xset.RData" />
|
|
176 <data name="sampleMetadata" format="tabular" label="sampleMetadata.tsv" />
|
|
177 <data name="ticsRawPdf" format="pdf" label="xset.TICs_raw.pdf" />
|
|
178 <data name="bpcsRawPdf" format="pdf" label="xset.BPCs_raw.pdf" />
|
|
179 <data name="log" format="txt" label="xset.log.txt" />
|
|
180 </outputs>
|
|
181
|
|
182 <tests>
|
|
183 <test>
|
|
184 <param name="inputs.input" value="zip_file" />
|
|
185 <param name="inputs.zip_file" value="sacuri.zip" />
|
|
186 <param name="methods.method" value="matchedFilter" />
|
|
187 <param name="methods.step" value="0.01" />
|
|
188 <param name="methods.fwhm" value="4" />
|
|
189 <param name="methods.options_m.option" value="show" />
|
|
190 <param name="methods.options_m.max" value="50" />
|
|
191 <param name="methods.options_m.snthresh" value="1" />
|
|
192 <param name="methods.options_m.steps" value="2" />
|
|
193 <output name="xsetRData" file="xset.RData" />
|
|
194 <output name="sampleMetadata" file="sampleMetadata.tsv" />
|
|
195 <output name="ticsRawPdf" file="xset.TICs_raw.pdf" />
|
|
196 <output name="bpcsRawPdf" file="xset.BPCs_raw.pdf" />
|
|
197 <output name="log" file="xset.log.txt" />
|
|
198 </test>
|
|
199 </tests>
|
|
200
|
|
201 <help>
|
|
202
|
|
203 .. class:: infomark
|
|
204
|
|
205 **Authors** Colin A. Smith csmith@scripps.edu, Ralf Tautenhahn rtautenh@gmail.com, Steffen Neumann sneumann@ipb-halle.de, Paul Benton hpaul.benton08@imperial.ac.uk and Christopher Conley cjconley@ucdavis.edu
|
|
206
|
|
207 .. class:: infomark
|
|
208
|
|
209 **Galaxy integration** ABiMS TEAM - UPMC/CNRS - Station biologique de Roscoff and Yann Guitton yann.guitton@univ-nantes.fr - part of Workflow4Metabolomics.org [W4M]
|
|
210
|
|
211 | Contact support@workflow4metabolomics.org for any questions or concerns about the Galaxy implementation of this tool.
|
|
212
|
|
213 ---------------------------------------------------
|
|
214
|
|
215 ============
|
|
216 Xcms.xcmsSet
|
|
217 ============
|
|
218
|
|
219 -----------
|
|
220 Description
|
|
221 -----------
|
|
222
|
|
223 This tool is used for preprocessing analyte data from multiple LC/MS files (formats NetCDF, mzXML and mzData). It extracts ion from each sample independently and using a statistic model, peaks are filtered and integrated.
|
|
224 You can read a tutorial on how to perform xcms preprocessing which is available here_.
|
|
225
|
|
226 .. _here: http://web11.sb-roscoff.fr/download/w4m/howto/w4m_HowToPerformXcmsPreprocessing_v02.pdf
|
|
227
|
|
228
|
|
229 -----------------
|
|
230 Workflow position
|
|
231 -----------------
|
|
232
|
|
233 **Upstream tools**
|
|
234
|
|
235 ========================= ================= ======= =========
|
|
236 Name output file format parameter
|
|
237 ========================= ================= ======= =========
|
|
238 NA NA zip NA
|
|
239 ========================= ================= ======= =========
|
|
240
|
|
241
|
|
242 **Downstream tools**
|
|
243
|
|
244 +---------------------------+--------------------+-----------------+
|
|
245 | Name | Output file | Format |
|
|
246 +===========================+====================+=================+
|
|
247 |xcms.group | xset.RData | rdata.xcms.raw |
|
|
248 +---------------------------+--------------------+-----------------+
|
|
249 |PCA ellipsoid by factors | sampleMetadata.tsv | Tabular |
|
|
250 +---------------------------+--------------------+-----------------+
|
|
251 |Anova | sampleMetadata.tsv | Tabular |
|
|
252 +---------------------------+--------------------+-----------------+
|
|
253
|
|
254
|
|
255 **Example of a metabolomic workflow**
|
|
256
|
|
257 .. image:: XCMS_Galaxy_workflow.png
|
|
258
|
|
259
|
|
260 ------
|
|
261
|
|
262 .. class:: infomark
|
|
263
|
|
264 The output file is an xset.RData file. You can continue your analysis using it in **xcms.group** tool.
|
|
265
|
|
266 ---------------------------------------------------
|
|
267
|
|
268
|
|
269
|
|
270 -----------
|
|
271 Input files
|
|
272 -----------
|
|
273
|
|
274 +---------------------------+------------+
|
|
275 | Parameter : num + label | Format |
|
|
276 +===========================+============+
|
|
277 | 1 : Choose your inputs | zip |
|
|
278 +---------------------------+------------+
|
|
279
|
|
280 **Choose your inputs**
|
|
281
|
|
282 You have two methods for your inputs:
|
|
283
|
|
284 | Zip file (recommended): You can put a zip file containing your inputs: myinputs.zip (containing all your conditions as sub-directories).
|
|
285 | library folder: You must specify the name of your "library" (folder) created within your space project (for example: /projet/externe/institut/login/galaxylibrary/yourlibrary). Your library must contain all your conditions as sub-directories.
|
|
286
|
|
287 ----------
|
|
288 Parameters
|
|
289 ----------
|
|
290
|
|
291 Extraction method for peaks detection
|
|
292 -------------------------------------
|
|
293
|
|
294 **Matched Filter**
|
|
295
|
|
296 | One parameter to consider is the Gaussian model peak width used for matched filtration,an integral part of the peak detection algorithm.
|
|
297 | For a discussion of how model peak width affects the signal to noise ratio, see Danielsson et al. (2002).
|
|
298
|
|
299
|
|
300 **cent Wave**
|
|
301
|
|
302 | This algorithm is most suitable for high resolution LC/{TOF,OrbiTrap,FTICR}-MS data in centroid mode.
|
|
303 | Due to the fact that peak centroids are used, a binning step is not necessary.
|
|
304 | The method is capable of detecting close-by-peaks and also overlapping peaks. Some efforts are made to detect the exact peak boundaries to get precise peak integrals.
|
|
305
|
|
306 **MSW**
|
|
307
|
|
308 | Wavelet based, used for direct infusion data. Continuous wavelet transform (CWT) can be used to locate chromatographic peaks on different scales.
|
|
309 | If you wish to have more details about the other parameters, you can read the following documents:
|
|
310 | -Example of preprocessing data with XCMS : http://www.bioconductor.org/packages/2.12/bioc/vignettes/xcms/inst/doc/xcmsPreprocess.pdf
|
|
311 | -Details and explanations for all the parameters of XCMS package: http://www.bioconductor.org/packages/release/bioc/manuals/xcms/man/xcms.pdf
|
|
312
|
|
313
|
|
314 ------------
|
|
315 Output files
|
|
316 ------------
|
|
317
|
|
318 xset.TICs_raw.pdf
|
|
319
|
|
320 | "Total Ion Chromatograms" graph in pdf format.
|
|
321
|
|
322 xset.BPCs_raw.pdf
|
|
323
|
|
324 | "Base Peak Chromatograms" graph in pdf format with each class samples opposed.
|
|
325
|
|
326 sampleMetadata.tsv
|
|
327
|
|
328 | Tabular file that contains for each sample, it's associated class and polarity (positive,negative and mixed).
|
|
329 | This file is necessary in the Anova and PCA step of the workflow.
|
|
330
|
|
331 xset.RData: rdata.xcms.raw format
|
|
332
|
|
333 | Rdata file that is necessary in the second step of the workflow "xcms.group".
|
|
334
|
|
335 ------
|
|
336
|
|
337 .. class:: infomark
|
|
338
|
|
339 The output file is an xset.RData file. You can continue your analysis using it in **xcms.group** tool.
|
|
340
|
|
341 ---------------------------------------------------
|
|
342
|
|
343 ---------------
|
|
344 Working example
|
|
345 ---------------
|
|
346
|
|
347 Input files
|
|
348 -----------
|
|
349
|
|
350 | zip_file -> **sacuri.zip**
|
|
351
|
|
352 Parameters
|
|
353 ----------
|
|
354
|
|
355 | Method -> **matchedFilter**
|
|
356 | step -> **0.01**
|
|
357 | fwhm -> **4**
|
|
358 | Advanced option -> **show**
|
|
359 | max: -> **50**
|
|
360 | snthresh -> **1**
|
|
361 | steps -> **2**
|
|
362
|
|
363
|
|
364 Output files
|
|
365 ------------
|
|
366
|
|
367 | **1) xset.RData: RData file**
|
|
368
|
|
369 | **2) Example of a sampleMetadata.tsv :**
|
|
370
|
|
371
|
|
372 +---------------------------+------------+---------+
|
|
373 | sampleMetadata | class | polarity|
|
|
374 +===========================+============+=========+
|
|
375 |HU_neg_017 | bio |negative |
|
|
376 +---------------------------+------------+---------+
|
|
377 |HU_neg_028 | bio |negative |
|
|
378 +---------------------------+------------+---------+
|
|
379 |HU_neg_034 | bio |negative |
|
|
380 +---------------------------+------------+---------+
|
|
381 |Blanc04 | blank |negative |
|
|
382 +---------------------------+------------+---------+
|
|
383 |Blanc06 | blank |negative |
|
|
384 +---------------------------+------------+---------+
|
|
385 |Blanc09 | blank |negative |
|
|
386 +---------------------------+------------+---------+
|
|
387
|
|
388
|
|
389
|
|
390 | **3) Example of xset.TICs_raw.pdf (Total Ion Chromatograms) :**
|
|
391
|
|
392 .. image:: xcms_tics.png
|
|
393
|
|
394
|
|
395 </help>
|
|
396
|
|
397
|
|
398 <citations>
|
|
399 <citation type="doi">10.1021/ac051437y</citation>
|
|
400 <citation type="doi">10.1093/bioinformatics/btu813</citation>
|
|
401 </citations>
|
|
402
|
|
403 </tool>
|