comparison abims_xcms_fillPeaks.xml @ 13:91c71f3808f3 draft

planemo upload for repository https://github.com/workflow4metabolomics/xcms commit 9f72e947d9c241d11221cad561f3525d27231857
author lecorguille
date Tue, 18 Sep 2018 16:13:36 -0400
parents dcb9041cb9ea
children 34fe699200ce
comparison
equal deleted inserted replaced
12:dcb9041cb9ea 13:91c71f3808f3
1 <tool id="abims_xcms_fillPeaks" name="xcms.fillPeaks" version="2.1.1"> 1 <tool id="abims_xcms_fillPeaks" name="xcms fillChromPeaks (fillPeaks)" version="@WRAPPER_VERSION@.0">
2 2
3 <description>Integrate a sample's signal in regions where peak groups are not represented to create new peaks in missing areas</description> 3 <description>Integrate areas of missing peaks</description>
4 4
5 <macros> 5 <macros>
6 <import>macros.xml</import> 6 <import>macros.xml</import>
7 <import>macros_xcms.xml</import>
7 </macros> 8 </macros>
8 9
9 <expand macro="requirements"/> 10 <expand macro="requirements"/>
10 <expand macro="stdio"/> 11 <expand macro="stdio"/>
11 12
12 <command><![CDATA[ 13 <command><![CDATA[
13 @COMMAND_XCMS_SCRIPT@ 14 @COMMAND_RSCRIPT@/xcms_fillpeaks.r
14 xfunction fillPeaks 15
15 image '$image' 16 image '$image'
16 17
17 xsetRdataOutput '$xsetRData' 18 ## Advanced
18 19 expandMz $Adv.expandMz
19 method $method 20 expandRt $Adv.expandRt
21 ppm $Adv.ppm
20 22
21 @COMMAND_PEAKLIST@ 23 @COMMAND_PEAKLIST@
22 24
23 @COMMAND_FILE_LOAD@ 25 @COMMAND_FILE_LOAD@
24 26
25 @COMMAND_LOG_EXIT@ 27 @COMMAND_LOG_EXIT@
26 28
27 ]]></command> 29 ]]></command>
28 30
29 <inputs> 31 <inputs>
30 <param name="image" type="data" format="rdata.xcms.group,rdata" label="xset RData file" help="output file from another xcms function (group)" /> 32 <param name="image" type="data" format="rdata.xcms.group,rdata" label="@INPUT_IMAGE_LABEL@" help="@INPUT_IMAGE_HELP@ from groupChromPeaks" />
31 <param name="method" type="select" label="Filling method" help="[method] See the help section below"> 33
32 <option value="chrom" selected="true">chrom</option> 34 <section name="Adv" title="Advanced Options" expanded="False">
33 <option value="MSW" >MSW</option> 35 <param argument="expandMz" type="integer" value="0" label="Value by which the mz width of peaks should be expanded" help="Each peak is expanded in mz direction by ‘expandMz *’ their original mz width. A value of ‘0’ means no expansion, a value of ‘1’ grows each peak by 1 * the mz width of the peak resulting in peakswith twice their original size in mz direction (expansion by half mz width to both sides)." />
34 </param> 36 <param argument="expandRt" type="integer" value="0" label="Value by which the RT width of peaks should be expanded" help="Each peak is expanded in RT direction by ‘expandRt *’ their original RT width. A value of ‘0’ means no expansion, a value of ‘1’ grows each peak by 1 * the RT width of the peak resulting in peakswith twice their original size in RT direction (expansion by half RT width to both sides)."/>
37 <param argument="ppm" type="integer" value="0" label="Specifying a ppm by which the mz width of the peak region should be expanded" help="For peaks with an mz width smaller than ‘mean(c(mzmin, mzmax)) * ppm / 1e6’, the ‘mzmin’ will be replaced by ‘mean(c(mzmin, mzmax)) - (mean(c(mzmin, mzmax)) * ppm / 2 / 1e6)’ and ‘mzmax’ by ‘mean(c(mzmin, mzmax)) + (mean(c(mzmin, mzmax)) * ppm / 2 / 1e6)’. This is applied before eventually expanding the mz width using the ‘expandMz’ parameter." />
38 </section>
35 39
36 <expand macro="input_peaklist"/> 40 <expand macro="input_peaklist"/>
37 41
38 <expand macro="input_file_load"/> 42 <expand macro="input_file_load"/>
39 43
40 </inputs> 44 </inputs>
41 45
42 <outputs> 46 <outputs>
43 <data name="xsetRData" format="rdata.xcms.fillpeaks" label="${image.name[:-6]}.fillPeaks.RData" /> 47 <data name="xsetRData" format="rdata.xcms.fillpeaks" label="${image.name[:-6]}.fillChromPeaks.RData" from_work_dir="fillpeaks.RData" />
44 <expand macro="output_peaklist" function="fillpeaks" /> 48 <expand macro="output_peaklist" function="fillpeaks" />
45 <data name="log" format="txt" label="xset.log.txt" hidden="true" />
46 </outputs> 49 </outputs>
47 50
48 <tests> 51 <tests>
49 <!--<test> 52 <!--<test>
50 <param name="image" value="xset.group.retcor.group.RData"/> 53 <param name="image" value="xset.group.retcor.group.RData" ftype="rdata"/>
51 <param name="method" value="chrom"/> 54 <param name="method" value="chrom"/>
52 <param name="zip_file" value="sacuri_dir_root.zip" ftype="zip" /> 55 <param name="zip_file" value="sacuri_dir_root.zip" ftype="zip" />
53 <output name="log"> 56 <assert_stdout>
54 <assert_contents> 57 <has_text text="object with 4 samples" />
55 <has_text text="object with 4 samples" /> 58 <has_text text="Time range: 0.2-1140.1 seconds (0-19 minutes)" />
56 <has_text text="Time range: 0.2-1140.1 seconds (0-19 minutes)" /> 59 <has_text text="Mass range: 50.0021-999.9863 m/z" />
57 <has_text text="Mass range: 50.0021-999.9863 m/z" /> 60 <has_text text="Peaks: 199718 (about 49930 per sample)" />
58 <has_text text="Peaks: 199718 (about 49930 per sample)" /> 61 <has_text text="Peak Groups: 48958" />
59 <has_text text="Peak Groups: 48958" /> 62 <has_text text="Sample classes: bio, blank" />
60 <has_text text="Sample classes: bio, blank" /> 63 </assert_stdout>
61 </assert_contents>
62 </output>
63 </test>--> 64 </test>-->
65 <!-- Issue with fillpeaks because it seems that there are too many NA
64 <test> 66 <test>
65 <param name="image" value="faahKO.xset.group.retcor.group.RData"/> 67 <param name="image" value="faahKO.xset.group.retcor.group.RData" ftype="rdata"/>
66 <param name="method" value="chrom"/> 68 <param name="method" value="chrom"/>
67 <conditional name="peaklist"> 69 <conditional name="peaklist">
68 <param name="convertRTMinute" value="false" /> 70 <param name="peaklistBool" value="true" />
69 <param name="peaklistBool" value="true" /> 71 <param name="convertRTMinute" value="false" />
70 <param name="numDigitsMZ" value="4" /> 72 <param name="numDigitsMZ" value="4" />
71 <param name="numDigitsRT" value="1" /> 73 <param name="numDigitsRT" value="1" />
72 </conditional> 74 </conditional>
73 <expand macro="test_file_load_zip"/> 75 <expand macro="test_file_load_zip"/>
74 <output name="log"> 76 <assert_stdout>
75 <assert_contents> 77 <has_text text="object with 4 samples" />
76 <has_text text="object with 4 samples" /> 78 <has_text text="Time range: 2509.2-4480.3 seconds (41.8-74.7 minutes)" />
77 <has_text text="Time range: 2506-4484 seconds (41.8-74.7 minutes)" /> 79 <has_text text="Mass range: 200.1-600 m/z" />
78 <has_text text="Mass range: 200.1-600 m/z" /> 80 <has_text text="Peaks: 32720 (about 8180 per sample)" />
79 <has_text text="Peaks: 32720 (about 8180 per sample)" /> 81 <has_text text="Peak Groups: 8209" />
80 <has_text text="Peak Groups: 8157" /> 82 <has_text text="Sample classes: KO, WT" />
81 <has_text text="Sample classes: KO, WT" /> 83 </assert_stdout>
82 </assert_contents>
83 </output>
84 <output name="variableMetadata" file="faahKO.xset.group.retcor.group.fillPeaks.variableMetadata.tsv" /> 84 <output name="variableMetadata" file="faahKO.xset.group.retcor.group.fillPeaks.variableMetadata.tsv" />
85 <output name="dataMatrix" file="faahKO.xset.group.retcor.group.fillPeaks.dataMatrix.tsv" /> 85 <output name="dataMatrix" file="faahKO.xset.group.retcor.group.fillPeaks.dataMatrix.tsv" />
86 </test> 86 </test>
87 <test> 87 <test>
88 <param name="image" value="faahKO-single.xset.merged.group.retcor.group.RData"/> 88 <param name="image" value="faahKO-single.xset.merged.group.retcor.group.RData" ftype="rdata"/>
89 <param name="method" value="chrom"/> 89 <param name="method" value="chrom"/>
90 <conditional name="peaklist"> 90 <conditional name="peaklist">
91 <param name="convertRTMinute" value="false" /> 91 <param name="peaklistBool" value="true" />
92 <param name="peaklistBool" value="true" /> 92 <param name="convertRTMinute" value="false" />
93 <param name="numDigitsMZ" value="4" /> 93 <param name="numDigitsMZ" value="4" />
94 <param name="numDigitsRT" value="1" /> 94 <param name="numDigitsRT" value="1" />
95 </conditional> 95 </conditional>
96 <expand macro="test_file_load_single"/> 96 <expand macro="test_file_load_single"/>
97 <output name="log"> 97 <assert_stdout>
98 <assert_contents> 98 <has_text text="object with 4 samples" />
99 <has_text text="object with 4 samples" /> 99 <has_text text="Time range: 2509.2-4480.3 seconds (41.8-74.7 minutes)" />
100 <has_text text="Time range: 2506-4484 seconds (41.8-74.7 minutes)" /> 100 <has_text text="Mass range: 200.1-600 m/z" />
101 <has_text text="Mass range: 200.1-600 m/z" /> 101 <has_text text="Peaks: 32720 (about 8180 per sample)" />
102 <has_text text="Peaks: 32720 (about 8180 per sample)" /> 102 <has_text text="Peak Groups: 8209" />
103 <has_text text="Peak Groups: 8157" /> 103 <has_text text="Sample classes: KO, WT" />
104 <has_text text="Sample classes: KO, WT" /> 104 </assert_stdout>
105 </assert_contents>
106 </output>
107 <output name="variableMetadata" file="faahKO.xset.group.retcor.group.fillPeaks.variableMetadata.tsv" /> 105 <output name="variableMetadata" file="faahKO.xset.group.retcor.group.fillPeaks.variableMetadata.tsv" />
108 <output name="dataMatrix" file="faahKO.xset.group.retcor.group.fillPeaks.dataMatrix.tsv" /> 106 <output name="dataMatrix" file="faahKO.xset.group.retcor.group.fillPeaks.dataMatrix.tsv" />
107 </test>-->
108 <!--<test>
109 <param name="image" value="faahKO-single.xset.merged.group2.retcor2.group2.RData" ftype="rdata"/>
110 <conditional name="peaklist">
111 <param name="peaklistBool" value="true" />
112 <param name="convertRTMinute" value="false" />
113 <param name="numDigitsMZ" value="4" />
114 <param name="numDigitsRT" value="1" />
115 <param name="naTOzero" value="false" />
116 </conditional>
117 <expand macro="test_file_load_single"/>
118 <assert_stdout>
119 <has_text text="expandMz: 0" />
120 <has_text text="expandRt: 0" />
121 <has_text text="object with 4 samples" />
122 <has_text text="Time range: 2499.4-4473.6 seconds (41.7-74.6 minutes)" />
123 <has_text text="Mass range: 200.1-600 m/z" />
124 <has_text text="Peaks: 15230 (about 3808 per sample)" />
125 <has_text text="Peak Groups: 6332" />
126 <has_text text="Sample classes: KO, WT" />
127 </assert_stdout>
128 <output name="variableMetadata" file="faahKO.xset.group2.retcor2.group2.fillPeaks2.variableMetadata.tsv" />
129 <output name="dataMatrix" file="faahKO.xset.group2.retcor2.group2.fillPeaks2.dataMatrix.tsv" />
130 </test>-->
131 <test>
132 <param name="image" value="faahKO-single.xset.merged.group2.retcor2.group2.RData" ftype="rdata"/>
133 <conditional name="peaklist">
134 <param name="peaklistBool" value="true" />
135 <param name="convertRTMinute" value="false" />
136 <param name="numDigitsMZ" value="4" />
137 <param name="numDigitsRT" value="1" />
138 </conditional>
139 <expand macro="test_file_load_single"/>
140 <assert_stdout>
141 <has_text text="expandMz: 0" />
142 <has_text text="expandRt: 0" />
143 <has_text text="object with 4 samples" />
144 <has_text text="Time range: 2499.4-4473.6 seconds (41.7-74.6 minutes)" />
145 <has_text text="Mass range: 200.1-600 m/z" />
146 <has_text text="Peaks: 15230 (about 3808 per sample)" />
147 <has_text text="Peak Groups: 6332" />
148 <has_text text="Sample classes: KO, WT" />
149 </assert_stdout>
150 <output name="variableMetadata" file="faahKO.xset.group2.retcor2.group2.fillPeaks2.variableMetadata.tsv" />
151 <output name="dataMatrix" file="faahKO.xset.group2.retcor2.group2.fillPeaks2.dataMatrix.NAless.tsv" />
109 </test> 152 </test>
110 </tests> 153 </tests>
111 154
112 <help><![CDATA[ 155 <help><![CDATA[
113 156
114 @HELP_AUTHORS@ 157 @HELP_AUTHORS@
115 158
116 ============== 159 ===================
117 Xcms.fillPeaks 160 xcms fillChromPeaks
118 ============== 161 ===================
119 162
120 ----------- 163 -----------
121 Description 164 Description
122 ----------- 165 -----------
123 166
124 **Integrate areas of missing peaks** 167 **Integrate areas of missing peaks**
125 For each sample, identify peak groups where that sample is not 168 For each sample, identify peak groups where that sample is not
126 represented. For each of those peak groups, integrate the signal 169 represented. For each of those peak groups, integrate the signal
127 in the region of that peak group and create a new peak. 170 in the region of that peak group and create a new peak.
128 171
129 According to the type of raw-data there are 2
130 different methods available. for filling gcms/lcms data the method
131 "chrom" integrates raw-data in the chromatographic domain, whereas
132 "MSW" is used for peaklists without retention-time information
133 like those from direct-infusion spectra.
134
135 172
136 173
137 ----------------- 174 -----------------
138 Workflow position 175 Workflow position
139 ----------------- 176 -----------------
140 177
141 178
142 **Upstream tools** 179 **Upstream tools**
143 180
144 ========================= ================= ================== ========== 181 ========================= ============================ ==================
145 Name output file format parameter 182 Name Output file Format
146 ========================= ================= ================== ========== 183 ========================= ============================ ==================
147 xcms.group xset.group.RData rdata.xcms.group RData file 184 xcms.groupChromPeaks ``*``.groupChromPeaks.RData rdata.xcms.group
148 ========================= ================= ================== ========== 185 ========================= ============================ ==================
149 186
150 187
151 **Downstream tools** 188 **Downstream tools**
152 189
153 +---------------------------+------------------+-----------------------+ 190 =========================== =========================== =======================
154 | Name | Output file | Format | 191 Name Output file Format
155 +===========================+==================+=======================+ 192 =========================== =========================== =======================
156 |CAMERA.annotate | xset.retcor.RData| rdata.xcms.fillpeaks | 193 CAMERA.annotate ``*``.fillChromPeaks.RData rdata.xcms.fillpeaks
157 +---------------------------+------------------+-----------------------+ 194 --------------------------- --------------------------- -----------------------
158 |xcms.summary | xset.retcor.RData| rdata.xcms.fillpeaks | 195 xcms.process_history ``*``.fillChromPeaks.RData rdata.xcms.fillpeaks
159 +---------------------------+------------------+-----------------------+ 196 =========================== =========================== =======================
160
161 The output file **xset.fillpeaks** is a RData file. You can continue your analysis using it in **CAMERA.annotate** or **xcms.summary** tool as a following step of your workflow.
162 197
163 198
164 **General schema of the metabolomic workflow** 199 **General schema of the metabolomic workflow**
165 200
166 .. image:: xcms_fillpeaks_workflow.png 201 .. image:: xcms_fillpeaks_workflow.png
167 202
168 203 ---------------------------------------------------
169
170 -----------
171 Input files
172 -----------
173
174 +---------------------------+-----------------------+
175 | Parameter : num + label | Format |
176 +===========================+=======================+
177 | 1 : RData file | rdata.xcms.group |
178 +---------------------------+-----------------------+
179
180 204
181 ---------- 205 ----------
182 Parameters 206 Parameters
183 ---------- 207 ----------
184 208
185 209 | See the fillChromPeaks_manual_
186 Method 210
187 ------ 211 .. _fillChromPeaks_manual: https://rdrr.io/bioc/xcms/man/fillChromPeaks.html
188 212
189 **chrom** 213 @HELP_XCMS_MANUAL@
190 214
191 | This method produces intensity values for those missing samples by integrating raw data in peak group region. In a given group, the start and ending retention time points for integration are defined by the median start and end points of the other detected peaks. The start and end m/z values are similarly determined. Intensities can be still be zero, which is a rather unusual intensity for a peak. This is the case if e.g. the raw data was threshholded, and the integration area contains no actual raw intensities, or if one sample is miscalibrated, such the raw data points are (just) outside the integration area. 215 @HELP_PEAKLIST@
192 | Importantly, if retention time correction data is available, the alignment information is used to more precisely integrate the propper region of the raw data. If the corrected retention time is beyond the end of the raw data, the value will be not-a-number (NaN).
193
194 **MSW**
195
196 | "MSW" is used for peaklists without retention-time information like those from direct-infusion spectra.
197
198
199 Get a Peak List
200 ---------------
201
202 If 'true', the module generates two additional files corresponding to the peak list:
203 - the variable metadata file (corresponding to information about extracted ions such as mass or retention time)
204 - the data matrix (corresponding to related intensities)
205
206 **decimal places for [mass or retention time] values in identifiers**
207
208 | Ions' identifiers are constructed as MxxxTyyy where 'xxx' is the ion median mass and 'yyy' the ion median retention time.
209 | Two parameters are used to adjust the number of decimal places wanted in identifiers for mass and retention time respectively.
210 | Theses parameters do not affect decimal places in columns other than the identifier one.
211
212 **Reported intensity values**
213
214 | This parameter determines which values should be reported as intensities in the dataMatrix table; it correspond to xcms 'intval' parameter:
215 | - into: integrated area of original (raw) peak
216 | - maxo: maximum intensity of original (raw) peak
217 | - intb: baseline corrected integrated peak area (only available if peak detection was done by ‘findPeaks.centWave’)
218 216
219 ------------ 217 ------------
220 Output files 218 Output files
221 ------------ 219 ------------
222 220
223 xset.fillPeaks.RData : rdata.xcms.fillpeaks format 221 xset.fillPeaks.RData : rdata.xcms.fillpeaks format
224 222
225 | Rdata file that will be used in the **CAMERA.annotate** or **xcms.summary** step of the workflow. 223 | Rdata file that will be used in the **CAMERA.annotate** or **xcms.process_history** step of the workflow.
226 224
227 xset.variableMetadata.tsv : tabular format 225 @HELP_PEAKLIST_OUTPUT@
228
229 | Table containing information about ions; can be used as one input of **Quality_Metrics** or **Generic_filter** modules.
230
231 xset.dataMatrix.tsv : tabular format
232
233 | Table containing ions' intensities; can be used as one input of **Quality_Metrics** or **Generic_filter** modules.
234
235 ------
236
237 .. class:: infomark
238
239 The output file is a xset.fillPeaks.RData file. You can continue your analysis using it in **CAMERA.annotate** or **xcms.summary** tool.
240
241
242 ---------------------------------------------------
243
244 ---------------
245 Working example
246 ---------------
247
248 Input files
249 -----------
250
251 | RData file -> **xset.retcor.RData**
252
253 Parameters
254 ----------
255
256 | method -> **chrom**
257 | Get a Peak List -> **false**
258
259
260 Output files
261 ------------
262
263 | **xset.fillPeaks.RData: RData file**
264 226
265 227
266 --------------------------------------------------- 228 ---------------------------------------------------
267 229
268 Changelog/News 230 Changelog/News
269 -------------- 231 --------------
270 232
233 **Version 3.0.0.0 - 08/03/2018**
234
235 - UPGRADE: upgrade the xcms version from 1.46.0 to 3.0.0. So refactoring of a lot of underlying codes and methods. Some parameters may have been renamed.
236
237 - UPDATE: since xcms 3.0.0, the selection of a method is no more needed (chrom or MSW). xcms will detect from the data the peak picking method used in findChromPeaks
238
239 - UPDATE: since xcms 3.0.0, new parameters are available: expandMz, expandRt and ppm
240
241
271 **Version 2.1.1 - 29/11/2017** 242 **Version 2.1.1 - 29/11/2017**
272 243
273 - BUGFIX: To avoid issues with accented letter in the parentFile tag of the mzXML files, we changed a hidden mechanim to LC_ALL=C 244 - BUGFIX: To avoid issues with accented letter in the parentFile tag of the mzXML files, we changed a hidden mechanim to LC_ALL=C
274 245
246
275 **Version 2.1.0 - 07/02/2017** 247 **Version 2.1.0 - 07/02/2017**
276 248
277 - IMPROVEMENT: change the management of the peaklist ids. The main ids remain the same as xcms generated. The export setiings now only add custom names in the variableMetadata tab (namecustom) 249 - IMPROVEMENT: change the management of the peaklist ids. The main ids remain the same as xcms generated. The export setiings now only add custom names in the variableMetadata tab (namecustom)
278 250
279 - IMPROVEMENT: xcms.fillpeaks can deal with merged individual data 251 - IMPROVEMENT: xcms.fillpeaks can deal with merged individual data
280 252
253
281 **Version 2.0.8 - 22/12/2016** 254 **Version 2.0.8 - 22/12/2016**
282 255
283 - IMPROVEMENT: Add an option to export the peak list at this step without having to wait for CAMERA.annotate 256 - IMPROVEMENT: Add an option to export the peak list at this step without having to wait for CAMERA.annotate
284 257
258
285 **Version 2.0.7 - 06/07/2016** 259 **Version 2.0.7 - 06/07/2016**
286 260
287 - UPGRADE: upgrate the xcms version from 1.44.0 to 1.46.0 261 - UPGRADE: upgrate the xcms version from 1.44.0 to 1.46.0
288 262
263
289 **Version 2.0.6 - 04/04/2016** 264 **Version 2.0.6 - 04/04/2016**
290 265
291 - TEST: refactoring to pass planemo test using conda dependencies 266 - TEST: refactoring to pass planemo test using conda dependencies
292 267
268
293 **Version 2.0.5 - 10/02/2016** 269 **Version 2.0.5 - 10/02/2016**
294 270
295 - BUGFIX: better management of errors. Datasets remained green although the process failed 271 - BUGFIX: better management of errors. Datasets remained green although the process failed
296 272
297 - UPDATE: refactoring of internal management of inputs/outputs 273 - UPDATE: refactoring of internal management of inputs/outputs