comparison batchcorrection-57edfd3943ab/batch_correction.xml @ 3:73892ef177e3 draft

Uploaded
author melpetera
date Tue, 02 May 2017 09:47:22 -0400
parents
children
comparison
equal deleted inserted replaced
2:016780b192a6 3:73892ef177e3
1 <tool id="Batch_correction" name="Batch_correction" version="2.1.2">
2 <description>Corrects intensities for signal drift and batch-effects</description>
3
4 <requirements>
5 <requirement type="package">r-batch</requirement>
6 <requirement type="package">r-ade4</requirement>
7 <requirement type="package">bioconductor-pcamethods</requirement>
8 <requirement type="package">bioconductor-ropls</requirement>
9 </requirements>
10
11 <stdio>
12 <exit_code range="1:" level="fatal" />
13 </stdio>
14
15 <command><![CDATA[
16 #if str($span_condition.method) == 'all_loess_pool':
17 Rscript $__tool_directory__/batch_correction_all_loess_wrapper.R
18 dataMatrix "$dataMatrix"
19 sampleMetadata "$sampleMetadata"
20 variableMetadata "$variableMetadata"
21 method "all_loess_pool"
22 span "${span_condition.span}"
23
24 #elif str($span_condition.method) == 'all_loess_sample':
25 Rscript $__tool_directory__/batch_correction_all_loess_wrapper.R
26 dataMatrix "$dataMatrix"
27 sampleMetadata "$sampleMetadata"
28 variableMetadata "$variableMetadata"
29 method "all_loess_sample"
30 span "${span_condition.span}"
31 #else:
32 Rscript $__tool_directory__/batch_correction_wrapper.R
33 analyse "batch_correction"
34 dataMatrix "$dataMatrix"
35 sampleMetadata "$sampleMetadata"
36 variableMetadata "$variableMetadata"
37 method "${span_condition.method}"
38 #if str($span_condition.method) == 'linear':
39 span "none"
40 #else:
41 span "${span_condition.span}"
42 #end if
43 valnull "${span_condition.valnull}"
44 ref_factor "${span_condition.ref_factor}"
45 detail "${span_condition.detail}"
46 #end if
47 dataMatrix_out "$dataMatrix_out" variableMetadata_out "$variableMetadata_out"
48 graph_output "$graph_output" rdata_output "$rdata_output"
49 ]]></command>
50
51 <inputs>
52 <param name="dataMatrix" label="Data Matrix file " format="tabular" type="data" />
53 <param name="sampleMetadata" label="Sample metadata file " format="tabular" type="data" help="must contain at least the three following columns: 'batch' + 'injectionOrder' + 'sampleType'"/>
54 <param name="variableMetadata" label="Variable metadata file " format="tabular" type="data" />
55
56 <conditional name="span_condition">
57 <param name="method" label="Type of regression model " type="select" help="To select between linear or non-linear (lowess or loess) methods to be used in Van der Kloet algorithm ; when using loess, you can choose to use pools or samples to model batch effect.">
58 <option value="linear">linear</option>
59 <option value="lowess">lowess</option>
60 <option value="loess">loess</option>
61 <option value="all_loess_pool">all loess pool</option>
62 <option value="all_loess_sample">all loess sample</option>
63 </param>
64 <when value="linear">
65 <param name="valnull" label="Null values" type="select" display="radio" help="What to do of generated negative or infinite values">
66 <option value="0">consider it as a null intensity</option>
67 <option value="NA">consider it as a missing value</option>
68 </param>
69 <param name="ref_factor" label="Factor of interest " type="text" value="batch" help="column name of factor of interest (often a biological factor); if none, leave 'batch'" />
70 <param name="detail" label="Level of details for plots " type="select" help="Amount of plots in the pdf file output. See Help section for more details.">
71 <option value="no">basic</option>
72 <option value="plot">standard</option>
73 <option value="reg">complete</option>
74 </param>
75 </when>
76 <when value="lowess">
77 <param name="span" type="float" value="0.85" label="span" help="it is a advanced option. Must be less than or equal to 1"/>
78 <param name="valnull" label="Null values" type="select" display="radio" help="What to do of generated negative or infinite values">
79 <option value="0">consider it as a null intensity</option>
80 <option value="NA">consider it as a missing value</option>
81 </param>
82 <param name="ref_factor" label="Factor of interest " type="text" value="batch" help="column name of factor of interest (often a biological factor); if none, leave 'batch'" />
83 <param name="detail" label="Level of details for plots " type="select" help="Amount of plots in the pdf file output. See Help section for more details.">
84 <option value="no">basic</option>
85 <option value="plot">standard</option>
86 <option value="reg">complete</option>
87 </param>
88 </when>
89 <when value="loess">
90 <param name="span" type="float" value="1" label="span" help="it is a advanced option. Must be strictly greater than 0"/>
91 <param name="valnull" label="Null values" type="select" display="radio" help="What to do of generated negative or infinite values">
92 <option value="0">consider it as a null intensity</option>
93 <option value="NA">consider it as a missing value</option>
94 </param>
95 <param name="ref_factor" label="Factor of interest " type="text" value="batch" help="column name of factor of interest (often a biological factor); if none, leave 'batch'" />
96 <param name="detail" label="Level of details for plots " type="select" help="Amount of plots in the pdf file output. See Help section for more details.">
97 <option value="no">basic</option>
98 <option value="plot">standard</option>
99 <option value="reg">complete</option>
100 </param>
101 </when>
102 <when value="all_loess_pool">
103 <param name="span" type="float" value="1" label="span" help="smoothing parameter; must be > 0"/>
104 </when>
105 <when value="all_loess_sample">
106 <param name="span" type="float" value="1" label="span" help="smoothing parameter; must be > 0"/>
107 </when>
108 </conditional>
109 </inputs>
110
111 <outputs>
112 <data name="dataMatrix_out" label="${tool.name}_${span_condition.method}_${dataMatrix.name}" format="tabular"/>
113 <data name="variableMetadata_out" label="${tool.name}_${span_condition.method}_${variableMetadata.name}" format="tabular"/>
114 <data name="graph_output" label="${tool.name}_${span_condition.method}_graph" format="pdf"/>
115 <data name="rdata_output" label="${tool.name}_${span_condition.method}_rdata" format="rdata"/>
116 </outputs>
117
118 <tests>
119 <test>
120 <param name="dataMatrix" value="input-batchcorrection-dataMatrix.tsv"/>
121 <param name="sampleMetadata" value="input-batchcorrection-sampleMetadata.tsv"/>
122 <param name="variableMetadata" value="input-batchcorrection-variableMetadata.tsv"/>
123 <param name="method" value="all_loess_pool"/>
124 <param name="span" value="1"/>
125 <output name="dataMatrix_out" file="output-batchcorrection-dataMatrix.tsv"/>
126 </test>
127 </tests>
128
129
130 <help>
131
132 .. class:: infomark
133
134 **Authors**
135 | Jean-Francois Martin - PF MetaToul-AXIOM ; INRA ; MetaboHUB (for original version of this tool and overall development of the R script)
136 | Melanie Petera - PFEM ; INRA ; MetaboHUB (for R wrapper and R script improvement)
137 | Marion Landi - FLAME ; PFEM ; INRA ; MetaboHUB (for xml interface and R wrapper)
138 | Franck Giacomoni - PFEM ; INRA ; MetaboHUB (for xml interface and R wrapper)
139 | Etienne Thevenot - LIST/LADIS ; CEA ; MetaboHUB (for R script and wrapper regarding "all loess pool" and "all loess sample" methods)
140
141 ---------------------------------------------------
142
143 .. class:: infomark
144
145 **Please cite** If you use this tool, please cite:
146
147 when using the **linear**, **lowess** or **loess** methods:
148 | `F.M. Van Der Kloet, I. Bobeldijk, E.R. Verheij, R.H. Jellema. (2009). "Analytical error reduction using single point calibration for accurate and precise metabolomic phenotyping." Journal of Proteome Research p5132-5141 &lt;http://www.ncbi.nlm.nih.gov/pubmed/19754161&gt;`_
149
150 when using the **all loess pool** or **all loess sample** method:
151 | `Dunn et al (2011). Procedures for large-scale metabolic profiling of serum and plasma using gas chromatography and liquid chromatography coupled to mass spectrometry. Nature Protocols, 6:1060-1083 &lt;http://dx.doi.org/10.1038/nprot.2011.335&gt;`_
152 | Cleveland et al (1997). In Statistical Models in S; Chambers JM. and Hastie TJ. Ed.; Chapman et Hall: London; pp. 309-376
153 | Etienne A. Thevenot, Aurelie Roux, Ying Xu, Eric Ezan, and Christophe Junot (2015). Analysis of the human adult urinary metabolome variations with age, body mass index and gender by implementing a comprehensive workflow for univariate and OPLS statistical analyses. *Journal of Proteome Research*, **14**:3322-3335 (http://dx.doi.org/10.1021/acs.jproteome.5b00354).
154
155 ---------------------------------------------------
156
157 .. class:: infomark
158
159 **Tool updates**
160
161 See the **NEWS** section at the bottom of this page
162
163 ---------------------------------------------------
164
165 ================
166 Batch_correction
167 ================
168
169 -----------
170 Description
171 -----------
172
173 | **Instrumental drift** and **offset differences** between batches have been described in **LC-MS** experiments when the number of samples is large and/or multiple batches of acquisition are needed.
174 | Recently a normalization strategy relying on the measurements of a **pooled** (or QC) sample injected periodically has been described: for each variable, a **regression model** is fitted to the values of the **pool** and subsequently used to adjust the intensities of the samples of interest (van der Kloet et al, 2009; Dunn et al, 2011).
175 |
176 | The current tool implements **two strategies** which differ in the way the regression model is applied to the variables (either depending on variable quality metrics, or 'loess' model for all variables) and also in the generated figure.
177 |
178
179
180 -----------------
181 Workflow position
182 -----------------
183
184 .. image:: batch_correction.png
185 :width: 800
186
187
188 -----------
189 Input files
190 -----------
191
192 +----------------------------+------------+
193 | Parameter : num + label | Format |
194 +============================+============+
195 | 1 : Data Matrix file | tabular |
196 +----------------------------+------------+
197 | 2 : Sample metadata file | tabular |
198 +----------------------------+------------+
199 | 3 : Variable metadata file | tabular |
200 +----------------------------+------------+
201
202
203 Data Matrix file must contain the intensity values of variables.
204 | First line must contain all the samples' names
205 | First column must contain all the variables' ID
206 |
207
208 Sample metadata file must contain at least the three following columns:
209 | "batch" to identify the batches of analyses
210 | "injectionOrder" (integers) defining the injection order of all samples (QC-pools as well as analysed samples)
211 | "sampleType" indicating if a sample ("sample") or a QC-pool ("pool"); each batch needs
212 | at least 3 QC-pools for intra-batch linear adjustment and 8 for lo(w)ess adjustment (5 for **all loess** methods)
213
214
215 .. class:: warningmark
216
217 MISSING DATA are allowed only with the **all loess** methods
218
219
220 ----------
221 Parameters
222 ----------
223
224 Type of regression model
225 | To choose between *linear*, *lowess*, *loess*, *all loess pool*, and *all loess sample* strategies
226 | **- Option 1** (**linear**, **lowess**, and **loess** methods): before the normalisation of each variable, some quality metrics are computed (see the "Determine Batch Correction" module); depending on the result, the variable can be normalized or not, with either the **linear**, **lowess** or **loess** model.
227 | **- Option 2** (**all loess pool** and **all loess sample**): each variable is normalized by using the 'loess' model;
228 | in the case **all loess pool** is chosen and the number of pool observations is below 5, the linear method is used (for all variables) and a warning is generated;
229 | if the pool intensities are not representative of the samples (which can be viewed on the figure where both trends are shown), the case **all loess sample** enables using the sample intensities (instead of the pool intensities) as the reference for the loess curve.
230 | In all "option 2" cases: the **median intensity of the reference observations** (either 'pool' or 'sample') is used as the scaling factor after the initial intensities have been divided by the loess predictions.
231 |
232
233 Span
234 | Smoothing parameter, advanced option for *lo(w)ess* and *all loess* methods
235 | In case of a loess fit, the **span** parameter (between 0 and 1) controls the smoothing
236 | (the higher the smoother; higher values are prefered to avoid overfitting; Cleveland et al, 1997).
237 |
238
239 Null values
240 | available for regression model *linear*, *lowess* and *loess*
241 | Controls what is done regarding negative or infinite values that can be generated during regression estimation.
242 | *consider it as a missing value* will switch concerned intensities to NA;
243 | this option implies that concerned ions will not be considered in PCA display.
244 | *consider it as a null intensity* will switch concerned intensities to 0 for lo(w)ess
245 | or correct them by the batch mean instead of regression estimate for linear.
246 |
247
248 Factor of interest
249 | available for regression model *linear*, *lowess* and *loess*
250 | Name of the factor (column header) in Sample metadata file that will be used as a categorical variable for plots and PCA.
251 | (often a biological factor ; if none, leave "batch")
252 | This factor does not affect correction calculation.
253 |
254
255 Level of details for plots
256 | available for regression model *linear*, *lowess* and *loess*
257 | *basic*: PCA + CV boxplot (before and after correction)
258 | *standard*: 'basic' plots + before/after-correction plots of intensities over injection order, and design effects for each ion
259 | *complete*: 'standard' plots + QC-pool regression plots per batch with samples' intensities over injection order
260 | This factor is not used by the *all loess* methods where a unique figure is generated showing the sum of intensities along injection order, and the first 4 PCA scores.
261 |
262
263
264 ------------
265 Output files
266 ------------
267
268 Batch_correction_$method_rdata.rdata
269 | binary data
270 | Download, open R and use the 'load' function; objects are in the 'res' list
271 |
272
273 Batch_correction_$method_graph.pdf
274 | graphical output
275 | For the *linear* and *lo(w)ess* methods, content depends on level of details chosen
276 |
277
278 Batch_correction_$method_variableMetadata.tabular
279 | tsv output
280 | Identical to the Variable metadata input file, with x more columns (where x is the number of batches) in case of *linear*, *lowess* and *loess* methods
281 |
282
283 Batch_correction_$method_dataMatrix.tabular
284 | tsv output (tabulated)
285 | Same formatting as Data Matrix file; contains corrected intensities
286 |
287
288
289 ---------------------------------------------------
290
291 ---------------
292 Working example
293 ---------------
294
295 .. class:: infomark
296
297 Refer to the corresponding "W4M HowTo" page:
298 | `MS data processing - Filters and normalisation &lt;http://workflow4metabolomics.org/sites/workflow4metabolomics.org/files/files/w4e-2016-data_processing.pdf&gt;`_
299 |
300 |
301
302 See also the reference history:
303 | `W4M00001_Sacurine-statistics (DOI:10.15454/1.4811121736910142E12) &lt;http://dx.doi.org/10.15454/1.4811121736910142E12&gt;`_
304 |
305
306 ---------------------------------------------------
307
308 ----
309 NEWS
310 ----
311
312 CHANGES IN VERSION 2.1.2
313 ========================
314
315 INTERNAL MODIFICATIONS
316
317 Minor modifications in config file
318
319 CHANGES IN VERSION 2.1.0
320 ========================
321
322 INTERNAL MODIFICATIONS
323
324 For PCA figure display only (**all_loess** options): missing values are set to the minimum value before PCA computation is performed (with svd)
325
326 Additional running and installation tests added with planemo, conda, and travis
327
328 BUG FIX
329
330 Variables with NA or 0 values in all reference samples are discarded before applying the **all_loess** normalization
331
332 INTERNAL MODIFICATIONS
333
334 Modifications of the **all_loess_wrapper** file to handle the recent **ropls** package versions (i.e. 1.3.15 and above) which use S4 classes
335
336 </help>
337 <!-- [RECOMMANDED] All citations associated to this tool (main citation given above and other references). Can be extracted from the history panel -->
338 <citations>
339 <!-- [HELP] As DOI or BibTex entry -->
340 <citation type="doi">10.1021/pr900499r</citation>
341 <citation type="doi">10.1038/nprot.2011.335</citation>
342 <citation type="bibtex">@ARTICLE{Cleveland91,
343 author = {Cleveland et al},
344 year = {1991},
345 journal = {Statistical Models in S, Chambers JM. and Hastie TJ. Ed., Chapman et Hall: London},
346 title = {Local Regression Models},
347 pages = {309-376},
348 editor = {Chambers JM. and Hastie TJ. Ed.},
349 publisher = {Chapman et Hall: London},
350 chapter = {8}
351 }</citation>
352 <citation type="doi">10.1021/acs.jproteome.5b00354</citation>
353 </citations>
354
355
356 </tool>