comparison BC/batch_correction.xml @ 4:23314e1192d4 draft default tip

Uploaded
author melpetera
date Thu, 14 Jan 2021 09:56:58 +0000
parents
children
comparison
equal deleted inserted replaced
3:73892ef177e3 4:23314e1192d4
1 <tool id="Batch_correction" name="Batch_correction" version="3.0.0">
2 <description>Corrects intensities for signal drift and batch-effects</description>
3
4 <macros>
5 <import>macros.xml</import>
6 </macros>
7
8 <requirements>
9 <requirement type="package" version="1.1_4">r-batch</requirement>
10 <requirement type="package" version="1.7_8">r-ade4</requirement>
11 <requirement type="package" version="1.70.0">bioconductor-pcamethods</requirement>
12 <requirement type="package" version="1.10.0">bioconductor-ropls</requirement>
13 </requirements>
14
15 <stdio>
16 <exit_code range="1:" level="fatal" />
17 </stdio>
18
19 <command><![CDATA[
20 #if str($span_condition.method) == 'all_loess_pool':
21 Rscript $__tool_directory__/batch_correction_all_loess_wrapper.R
22 dataMatrix "$dataMatrix"
23 sampleMetadata "$sampleMetadata"
24 variableMetadata "$variableMetadata"
25 method "all_loess_pool"
26 span "${span_condition.span}"
27
28 #elif str($span_condition.method) == 'all_loess_sample':
29 Rscript $__tool_directory__/batch_correction_all_loess_wrapper.R
30 dataMatrix "$dataMatrix"
31 sampleMetadata "$sampleMetadata"
32 variableMetadata "$variableMetadata"
33 method "all_loess_sample"
34 span "${span_condition.span}"
35 #else:
36 Rscript $__tool_directory__/batch_correction_3Lwrapper.R
37 analyse "batch_correction"
38 dataMatrix "$dataMatrix"
39 sampleMetadata "$sampleMetadata"
40 variableMetadata "$variableMetadata"
41 method "${span_condition.method}"
42 #if str($span_condition.method) == 'linear':
43 span "none"
44 #else:
45 span "${span_condition.span}"
46 #end if
47 valnull "${span_condition.valnull}"
48 ref_factor "${span_condition.ref_factor}"
49 detail "${span_condition.detail}"
50 #end if
51 dataMatrix_out "$dataMatrix_out" variableMetadata_out "$variableMetadata_out"
52 graph_output "$graph_output" rdata_output "$rdata_output"
53 @SM_CUSTOM@
54 ]]></command>
55
56 <inputs>
57 <param name="dataMatrix" label="Data matrix file " format="tabular" type="data" />
58 <param name="sampleMetadata" label="Sample metadata file " format="tabular" type="data" help="must contain at least the three following columns: 'batch' + 'injectionOrder' + 'sampleType'"/>
59 <param name="variableMetadata" label="Variable metadata file " format="tabular" type="data" />
60
61 <expand macro="sm_customisation"/>
62
63 <conditional name="span_condition">
64 <param name="method" label="Type of regression model " type="select" help="To select between linear or non-linear (lowess or loess) methods to be used in Van der Kloet algorithm ; when using loess, you can choose to use pools or samples to model batch effect.">
65 <option value="linear">linear</option>
66 <option value="lowess">lowess</option>
67 <option value="loess">loess</option>
68 <option value="all_loess_pool">all loess pool</option>
69 <option value="all_loess_sample">all loess sample</option>
70 </param>
71 <when value="linear">
72 <expand macro="lll_options"/>
73 </when>
74 <when value="lowess">
75 <expand macro="lll_span"/>
76 <expand macro="lll_options"/>
77 </when>
78 <when value="loess">
79 <expand macro="lll_span"/>
80 <expand macro="lll_options"/>
81 </when>
82 <when value="all_loess_pool">
83 <param name="span" type="float" value="1" label="span" help="smoothing parameter; must be > 0"/>
84 </when>
85 <when value="all_loess_sample">
86 <param name="span" type="float" value="1" label="span" help="smoothing parameter; must be > 0"/>
87 </when>
88 </conditional>
89 </inputs>
90
91 <outputs>
92 <data name="dataMatrix_out" label="BC_${span_condition.method}_${dataMatrix.name}" format="tabular"/>
93 <data name="variableMetadata_out" label="BC_${span_condition.method}_${variableMetadata.name}" format="tabular"/>
94 <data name="graph_output" label="${tool.name}_${span_condition.method}_graph" format="pdf"/>
95 <data name="rdata_output" label="${tool.name}_${span_condition.method}_rdata" format="rdata">
96 <filter>span_condition['method'] == 'all_loess_pool' or span_condition['method'] == 'all_loess_sample'</filter>
97 </data>
98 </outputs>
99
100 <tests>
101 <test>
102 <param name="dataMatrix" value="input-batchcorrection-dataMatrix.tsv"/>
103 <param name="sampleMetadata" value="input-batchcorrection-sampleMetadata.tsv"/>
104 <param name="variableMetadata" value="input-batchcorrection-variableMetadata.tsv"/>
105 <param name="method" value="all_loess_pool"/>
106 <param name="span" value="1"/>
107 <output name="dataMatrix_out" file="output-batchcorrection-dataMatrix.tsv"/>
108 </test>
109 <test>
110 <param name="dataMatrix" value="input-batchcorrection-dataMatrix.tsv"/>
111 <param name="sampleMetadata" value="input-batchcorrection-sampleMetadata-customSampleType.tsv"/>
112 <param name="variableMetadata" value="input-batchcorrection-variableMetadata.tsv"/>
113 <param name="method" value="loess"/>
114 <param name="span" value="1"/>
115 <param name="sample_type_col_name" value="MySampType"/>
116 <param name="sampleTypeTagPool" value="lot"/>
117 <param name="sampleTypeTagBlank" value="blanc"/>
118 <param name="sampleTypeTagSample" value="echant"/>
119 <output name="dataMatrix_out" file="output-batchcorrection-dataMatrix-3L.tsv"/>
120 </test>
121 </tests>
122
123 <help>
124
125 .. class:: infomark
126
127 **Authors**
128 | Jean-Francois Martin - PF MetaToul-AXIOM ; INRAE ; MetaboHUB (for original version of this tool and overall development of the R script)
129 | Melanie Petera - PFEM ; INRAE ; MetaboHUB (for R wrapper and R script improvement regarding "linear/lowess/loess" methods)
130 | Marion Landi - FLAME ; PFEM (for original xml interface and R wrapper)
131 | Franck Giacomoni - PFEM ; INRAE ; MetaboHUB (for original xml interface and R wrapper)
132 | Etienne Thevenot - LIST/LADIS ; CEA ; MetaboHUB (for R script and wrapper regarding "all loess pool" and "all loess sample" methods)
133
134 ---------------------------------------------------
135
136 .. class:: infomark
137
138 **Please cite** If you use this tool, please cite:
139
140 when using the **linear**, **lowess** or **loess** methods:
141 | `F.M. Van Der Kloet, I. Bobeldijk, E.R. Verheij, R.H. Jellema. (2009). "Analytical error reduction using single point calibration for accurate and precise metabolomic phenotyping." Journal of Proteome Research p5132-5141 &lt;http://www.ncbi.nlm.nih.gov/pubmed/19754161&gt;`_
142
143 when using the **all loess pool** or **all loess sample** method:
144 | `Dunn et al (2011). Procedures for large-scale metabolic profiling of serum and plasma using gas chromatography and liquid chromatography coupled to mass spectrometry. Nature Protocols, 6:1060-1083 &lt;http://dx.doi.org/10.1038/nprot.2011.335&gt;`_
145 | Cleveland et al (1997). In Statistical Models in S; Chambers JM. and Hastie TJ. Ed.; Chapman et Hall: London; pp. 309-376
146 | Etienne A. Thevenot, Aurelie Roux, Ying Xu, Eric Ezan, and Christophe Junot (2015). Analysis of the human adult urinary metabolome variations with age, body mass index and gender by implementing a comprehensive workflow for univariate and OPLS statistical analyses. *Journal of Proteome Research*, **14**:3322-3335 (http://dx.doi.org/10.1021/acs.jproteome.5b00354).
147
148 ---------------------------------------------------
149
150 .. class:: infomark
151
152 **Tool updates**
153
154 See the **NEWS** section at the bottom of this page
155
156 ---------------------------------------------------
157
158 ================
159 Batch_correction
160 ================
161
162 |
163
164 -----------
165 Description
166 -----------
167
168 | **Instrumental drift** and **offset differences** between batches have been described in **LC-MS** experiments when the number of samples is large and/or multiple batches of acquisition are needed.
169 | Recently a normalization strategy relying on the measurements of a **pooled** (or QC) sample injected periodically has been described: for each variable, a **regression model** is fitted to the values of the **pool** and subsequently used to adjust the intensities of the samples of interest (van der Kloet et al, 2009; Dunn et al, 2011).
170 |
171 | The current tool implements **two strategies** which differ in the way the regression model is applied to the variables (either depending on variable quality metrics, or 'loess' model for all variables) and also in the generated figure.
172 |
173
174
175 -----------------
176 Workflow position
177 -----------------
178
179 .. image:: batch_correction.png
180 :width: 800
181
182
183 -----------
184 Input files
185 -----------
186
187 +----------------------------+------------+
188 | Parameter : num + label | Format |
189 +============================+============+
190 | 1 : Data Matrix file | tabular |
191 +----------------------------+------------+
192 | 2 : Sample metadata file | tabular |
193 +----------------------------+------------+
194 | 3 : Variable metadata file | tabular |
195 +----------------------------+------------+
196
197 |
198
199 Data Matrix file must contain the intensity values of variables.
200 | First line must contain all the samples' names
201 | First column must contain all the variables' ID
202
203
204 Sample metadata file must contain at least the three following columns:
205 | - a batch column (default to "*batch*") to identify the batches of analyses
206 | - an injection order column (default to "*injectionOrder*") composed of integers defining the injection order of samples
207 | - a sample type column (default to "*sampleType*") indicating if a sample is a biological one ("*sample*"), a QC-pool ("*pool*") or a blank ("*blank*")
208 | *Default values* can be changed according to your data coding using the customisation parameters in the "**Sample metadata file coding parameters**" section.
209
210
211 **Notes concerning your design:**
212 | - the 3 mandatory columns must not contain NA
213 | - your data should contain at least 3 QC-pools in each batch for intra-batch **linear** adjustment and 8 for **lo(w)ess** adjustment (minimum of 5 for **all loess** methods)
214
215
216
217 ----------
218 Parameters
219 ----------
220
221
222 Sample metadata file coding parameters
223 | Enables to give the names of columns in the sample metadata table that contain the injection order, the batches and the sample types.
224 | Also enables to specify the sample type coding used in the sampletype column.
225 |
226
227 Type of regression model
228 | To choose between *linear*, *lowess*, *loess*, *all loess pool*, and *all loess sample* strategies
229 | **- Option 1** (**linear**, **lowess**, and **loess** methods): before the normalisation of each variable, some quality metrics are computed (see the "Determine Batch Correction" module); depending on the result, the variable can be normalized or not, with either the **linear**, **lowess** or **loess** model.
230 | **- Option 2** (**all loess pool** and **all loess sample**): each variable is normalized by using the 'loess' model;
231 | in the case **all loess pool** is chosen and the number of pool observations is below 5, the linear method is used (for all variables) and a warning is generated;
232 | if the pool intensities are not representative of the samples (which can be viewed on the figure where both trends are shown), the case **all loess sample** enables using the sample intensities (instead of the pool intensities) as the reference for the loess curve.
233 | In all "option 2" cases: the **median intensity of the reference observations** (either 'pool' or 'sample') is used as the scaling factor after the initial intensities have been divided by the loess predictions.
234 |
235
236 Span
237 | Smoothing parameter, advanced option for *lo(w)ess* and *all loess* methods
238 | In case of a loess fit, the **span** parameter (between 0 and 1) controls the smoothing
239 | (the higher the smoother; higher values are prefered to avoid overfitting; Cleveland et al, 1997).
240 |
241
242 Unconsistant values
243 | available for regression model *linear*, *lowess* and *loess*
244 | Controls what is done regarding negative or infinite values that can be generated during regression estimation.
245 | *Prevent it* will change the normalisation term leading to an unconsistant value to prevent it:
246 | when intra batch denominator term is below 1, it is turned to the minimum >1 one obtained in the concerned batch.
247 | *Consider it as a missing value* will switch concerned intensities to NA;
248 | this option implies that concerned ions will not be considered in PCA display.
249 | *Consider it as a null intensity* will switch concerned intensities to 0.
250 |
251
252 Factor of interest
253 | available for regression model *linear*, *lowess* and *loess*
254 | Name of the factor (column header) that will be used as a categorical variable for design plots (often a biological factor ; if none, put the batch column name).
255 | This factor does not affect correction calculation.
256 |
257
258 Level of details for plots
259 | available for regression model *linear*, *lowess* and *loess*
260 | *basic*: Sum of intensities + PCA + CV boxplot (before and after correction)
261 | *standard*: 'basic' plots + before/after-correction plots of intensities over injection order, and design effects for each ion
262 | *complete*: 'standard' plots + QC-pool regression plots per batch with samples' intensities over injection order
263 | This factor is not used by the *all loess* methods where a unique figure is generated showing the sum of intensities along injection order, and the first 4 PCA scores.
264 |
265
266
267 ------------
268 Output files
269 ------------
270
271 Batch_correction_$method_rdata.rdata ('all_loess' methods only)
272 | binary data
273 | Download, open R and use the 'load' function; objects are in the 'res' list
274 |
275
276 Batch_correction_$method_graph.pdf
277 | graphical output
278 | For the *linear* and *lo(w)ess* methods, content depends on level of details chosen
279 |
280
281 Batch_correction_$method_variableMetadata.tabular
282 | tsv output
283 | Identical to the variable metadata input file, with x more columns (where x is the number of batches) in case of *linear*, *lowess* and *loess* methods
284 |
285
286 Batch_correction_$method_dataMatrix.tabular
287 | tsv output (tabulated)
288 | Same formatting as the data matrix file; contains corrected intensities
289 |
290
291
292 ---------------------------------------------------
293
294 ----------------------
295 Additional information
296 ----------------------
297
298 .. class:: infomark
299
300 Refer to the corresponding "W4M HowTo" page:
301 | `MS data processing - Filters and normalisation &lt;https://download.workflow4metabolomics.org/docs/w4e2018/2018-10_EC_W4E%20-%20Dataprocessing_Filter_and_normalisation.pdf&gt;`_
302 |
303
304 See also the reference history:
305 | `W4M00001_Sacurine-statistics (DOI:10.15454/1.4811121736910142E12) &lt;http://dx.doi.org/10.15454/1.4811121736910142E12&gt;`_
306
307
308 ---------------------------------------------------
309
310 ----
311 NEWS
312 ----
313
314 CHANGES IN VERSION 3.0.0
315 ========================
316
317 NEW FEATURES
318
319 | - Specific names for the 'sampleType', 'injectionOrder', and 'batch' from sampleMetadata are now available in a dedicated parameter section
320 | - Addition of a sum of ions before/after plot for linear/lowess/loess methods
321 | - Addition of a third option in "Null values" parameter (renamed "unconsistant values") in linear/lowess/loess methods
322 | - linear/lowess/loess methods now handle NA in intensities and allow "blank" samples in the dataset
323 |
324
325 INTERNAL MODIFICATIONS
326
327 | - XML optimisation using macros
328 | - Output name changes
329 | - linear/lowess/loess methods: disabling of RData output
330 | - linear/lowess/loess methods: split of tool-linked code and script-linked one
331 | - linear/lowess/loess methods: adjustments in the normalisation process to match matters linked to NA acceptance
332 | - linear/lowess/loess methods: better handling of special characters in IDs and column names
333 |
334
335 CHANGES IN VERSION 2.2.4
336 ========================
337
338 INTERNAL MODIFICATIONS
339
340 Fixed bug for pool selection ("all_loess" methods)
341
342 CHANGES IN VERSION 2.2.2
343 ========================
344
345 INTERNAL MODIFICATIONS
346
347 Fixed bug for color plot ("all_loess" methods)
348
349 CHANGES IN VERSION 2.2.0
350 ========================
351
352 NEW FEATURE
353
354 Specific names for the 'sampleType', 'injectionOrder', and 'batch' from sampleMetadata can be selected by the user (for compatibility with the MTBLS downloader)
355
356 CHANGES IN VERSION 2.1.2
357 ========================
358
359 INTERNAL MODIFICATIONS
360
361 Minor modifications in config file
362
363 CHANGES IN VERSION 2.1.0
364 ========================
365
366 INTERNAL MODIFICATIONS
367
368 For PCA figure display only (**all_loess** options): missing values are set to the minimum value before PCA computation is performed (with svd)
369
370 Additional running and installation tests added with planemo, conda, and travis
371
372 BUG FIX
373
374 Variables with NA or 0 values in all reference samples are discarded before applying the **all_loess** normalization
375
376 INTERNAL MODIFICATIONS
377
378 Modifications of the **all_loess_wrapper** file to handle the recent **ropls** package versions (i.e. 1.3.15 and above) which use S4 classes
379
380 </help>
381 <!-- [RECOMMANDED] All citations associated to this tool (main citation given above and other references). Can be extracted from the history panel -->
382 <citations>
383 <!-- [HELP] As DOI or BibTex entry -->
384 <citation type="doi">10.1021/pr900499r</citation>
385 <citation type="doi">10.1038/nprot.2011.335</citation>
386 <citation type="bibtex">@ARTICLE{Cleveland91,
387 author = {Cleveland et al},
388 year = {1991},
389 journal = {Statistical Models in S, Chambers JM. and Hastie TJ. Ed., Chapman et Hall: London},
390 title = {Local Regression Models},
391 pages = {309-376},
392 editor = {Chambers JM. and Hastie TJ. Ed.},
393 publisher = {Chapman et Hall: London},
394 chapter = {8}
395 }</citation>
396 <citation type="doi">10.1021/acs.jproteome.5b00354</citation>
397 </citations>
398
399
400 </tool>