comparison multivariate_config.xml @ 0:fafba524dca6 draft

planemo upload for repository https://github.com/workflow4metabolomics/multivariate.git commit 6596dbd39d20ee1962d9ebdd87eec04821239760
author ethevenot
date Wed, 27 Jul 2016 11:22:56 -0400
parents
children da272496b32d
comparison
equal deleted inserted replaced
-1:000000000000 0:fafba524dca6
1 <tool id="Multivariate" name="Multivariate" version="2.3.2">
2 <description>PCA, PLS and OPLS</description>
3
4 <requirements>
5 <requirement type="package" version="3.2.2">R</requirement>
6 <requirement type="package">r-batch</requirement>
7 <requirement type="package" version="1.4.2">bioconductor-ropls</requirement>
8 </requirements>
9
10 <command><![CDATA[
11 $__tool_directory__/multivariate_wrapper.R
12
13 dataMatrix_in "$dataMatrix_in"
14 sampleMetadata_in "$sampleMetadata_in"
15 variableMetadata_in "$variableMetadata_in"
16
17 respC "$respC"
18 predI "$predI"
19 orthoI "$orthoI"
20 testL "$testL"
21
22 #if $advGph.opgC == "full"
23 typeC "$advGph.typeC"
24 parAsColC "$advGph.parAsColC"
25 parCexN "$advGph.parCexN"
26 parPc1I "$advGph.parPc1I"
27 parPc2I "$advGph.parPc2I"
28 parMahalC "$advGph.parMahalC"
29 parLabVc "$advGph.parLabVc"
30 #end if
31
32 #if $advCpt.opcC == "full"
33 algoC "$advCpt.algoC"
34 crossvalI "$advCpt.crossvalI"
35 log10L "$advCpt.log10L"
36 permI "$advCpt.permI"
37 scaleC "$advCpt.scaleC"
38 #end if
39
40 sampleMetadata_out "$sampleMetadata_out"
41 variableMetadata_out "$variableMetadata_out"
42 figure "$figure"
43 information "$information"
44 ]]></command>
45
46 <inputs>
47 <param name="dataMatrix_in" label="Data matrix file" type="data" format="tabular" help="variable x sample, decimal: '.', missing: NA, mode: numerical, sep: tabular" />
48 <param name="sampleMetadata_in" label="Sample metadata file" type="data" format="tabular" help="sample x metadata, decimal: '.', missing: NA, mode: character and numerical, sep: tabular" />
49 <param name="variableMetadata_in" label="Variable metadata file" type="data" format="tabular" help="variable x metadata, decimal: '.', missing: NA, mode: character and numerical, sep: tabular" />
50 <param name="respC" label="Y Response (for (O)PLS(-DA) only)" type="text" value = "none" help="Notes: 1) PCA: keep the default (none); 2) (O)PLS(-DA): indicate the name of the column of the sample table to be modeled" />
51
52 <param name="predI" label="Number of predictive components" type="select" help="Notes: 1) PCA and PLS(-DA): NA can be selected to get a suggestion of the optimal number of predictive components; 2) OPLS(-DA) modeling: select 1 predictive component">
53 <option value="NA" selected="true">NA</option>
54 <option value="1">1</option>
55 <option value="2">2</option>
56 <option value="3">3</option>
57 <option value="4">4</option>
58 <option value="5">5</option>
59 <option value="6">6</option>
60 <option value="7">7</option>
61 <option value="8">8</option>
62 <option value="9">9</option>
63 <option value="10">10</option>
64 </param>
65 <param name="orthoI" label="Number of orthogonal components (for OPLS(-DA) only)" type="select" help="Notes: 1) PCA and PLS(-DA): keep the default value (0); 2) OPLS(-DA): NA can be selected to get a suggestion of the optimal number of orthogonal components">
66 <option value="0">0</option>
67 <option value="NA">NA</option>
68 <option value="1">1</option>
69 <option value="2">2</option>
70 <option value="3">3</option>
71 <option value="4">4</option>
72 <option value="5">5</option>
73 <option value="6">6</option>
74 <option value="7">7</option>
75 <option value="8">8</option>
76 <option value="9">9</option>
77 <option value="10">10</option>
78 </param>
79
80 <param name="testL" label="Samples to be tested" type="select" help="In case predictions should be computed on test samples, provide in your sampleMetadata a column named test. (use exactly this column name, with the dot at the end) and containing yes and no values to indicate which samples should be tested; for those samples, the values of the response will not be used (you can leave NA in the response column of the sample metadata)">
81 <option value="TRUE">yes</option>
82 <option value="FALSE" selected="true">no</option>
83 </param>
84
85 <conditional name="advGph">
86 <param name="opgC" type="select" label="Advanced graphical parameters" >
87 <option value="default" selected="true">Use default</option>
88 <option value="full">Full parameter list</option>
89 </param>
90
91 <when value="default"/>
92 <when value="full">
93 <param name="typeC" label="Graphic type" type="select" help="">
94 <option value="correlation">correlation</option>
95 <option value="outlier">outlier</option>
96 <option value="overview">overview</option>
97 <option value="permutation">permutation</option>
98 <option value="predict-train">predict-train</option>
99 <option value="summary" selected="true">summary</option>
100 <option value="x-loading">x-loading</option>
101 <option value="x-score">x-score</option>
102 <option value="x-variance">x-variance</option>
103 <option value="xy-score">xy-score</option>
104 <option value="xy-weight">xy-weight</option>
105 </param>
106 <param name="parMahalC" label="Ellipses" type="text" value = "NA" help="Name of the sample metadata column with the classes to be used for drawing ellipses; for (O)PLS-DA, the default 'NA' means that the same name as the 'Response' argument above will be used; if you do not want ellipses, use none instead of NA" />
107 <param name="parAsColC" label="Sample colors" type="text" value = "none" help="Indicate the name of the sample metadata column with the names to be converted into colors; by default (none), data matrix sample names will be used" />
108 <param name="parLabVc" label="Sample labels" type="text" value = "none" help="Indicate the name of the sample metadata column with the names to be used as labels; By default (none), sample names from the data matrix will be used" />
109 <param name="parPc1I" label="Component to be displayed as abscissa" type="select" value = "-" help="In case of OPLS(-DA), the first component (i.e. the predictive component) must be set to 1">
110 <option value="1">1</option>
111 <option value="2">2</option>
112 <option value="3">3</option>
113 <option value="4">4</option>
114 <option value="5">5</option>
115 <option value="6">6</option>
116 <option value="7">7</option>
117 <option value="8">8</option>
118 <option value="9">9</option>
119 <option value="10">10</option>
120 </param>
121 <param name="parPc2I" label="Component to be displayed as ordinate" type="select" help="In case of OPLS(-DA), the orthogonal component of the selected value - 1 will be displayed (e.g. to see the first orthogonal component, select the value '2' below)">
122 <option value="2">2</option>
123 <option value="3">3</option>
124 <option value="4">4</option>
125 <option value="5">5</option>
126 <option value="6">6</option>
127 <option value="7">7</option>
128 <option value="8">8</option>
129 <option value="9">9</option>
130 <option value="10">10</option>
131 </param>
132 <param name="parCexN" type="float" value="0.8" label="Amount by which plotting text should be magnified relative to the default"/>
133
134 </when>
135 </conditional>
136
137 <conditional name="advCpt">
138 <param name="opcC" type="select" label="Advanced computational parameters" >
139 <option value="default" selected="true">Use default</option>
140 <option value="full">Full parameter list</option>
141 </param>
142
143 <when value="default"/>
144 <when value="full">
145 <param name="scaleC" label="Scaling" type="select" help="Select 'standard' for mean-centering and unit-variance scaling">
146 <option value="standard">standard</option>
147 <option value="center">center</option>
148 <option value="pareto">pareto</option>
149 </param>
150 <param name="permI" label="Permutation testing for (O)PLS(-DA): Number of permutations" type="select" help="Default is 20 for single response models without train/test partition, and 0 otherwise">
151 <option value="0">0</option>
152 <option value="20" selected="true">20</option>
153 <option value="100">100</option>
154 <option value="1000">1000</option>
155 </param>
156 <param name="log10L" label="Log10 transformation" type="select" help="">
157 <option value="TRUE">yes</option>
158 <option value="FALSE" selected="true">no</option>
159 </param>
160 <param name="algoC" label="Algorithm" type="select" help="Default algorithm is 'svd' for PCA and 'nipals' for PLS and OPLS; when performing PCA with 'svd' on an data matrix containing missing values, NAs are set to half the minimum of non-missing values and a warning is generated; an alternative is to use the 'nipals' algorithm (able to handle a moderate amount of missing values)">
161 <option value="default">default</option>
162 <option value="nipals">nipals</option>
163 <option value="svd">svd</option>
164 </param>
165 <param name="crossvalI" label="Number of cross-validation segments" type="select" help="Must be less than or equal to the number of samples">
166 <option value="1">1</option>
167 <option value="2">2</option>
168 <option value="3">3</option>
169 <option value="4">4</option>
170 <option value="5">5</option>
171 <option value="6">6</option>
172 <option value="7" selected="true">7</option>
173 <option value="8">8</option>
174 <option value="9">9</option>
175 <option value="10">10</option>
176 </param>
177
178 </when>
179 </conditional>
180
181 </inputs>
182
183 <outputs>
184 <data name="sampleMetadata_out" label="${tool.name}_${sampleMetadata_in.name}" format="tabular" ></data>
185 <data name="variableMetadata_out" label="${tool.name}_${variableMetadata_in.name}" format="tabular" ></data>
186 <data name="figure" label="${tool.name}__figure.pdf" format="pdf"/>
187 <data name="information" label="${tool.name}__information.txt" format="txt"/>
188 </outputs>
189
190 <tests>
191 <test>
192 <param name="dataMatrix_in" value="input-dataMatrix.tsv"/>
193 <param name="sampleMetadata_in" value="input-sampleMetadata.tsv"/>
194 <param name="variableMetadata_in" value="input-variableMetadata.tsv"/>
195 <param name="respC" value="age"/>
196 <param name="predI" value="1"/>
197 <param name="orthoI" value="1"/>
198 <param name="testL" value="FALSE"/>
199 <output name="sampleMetadata_out">
200 <assert_contents>
201 <has_n_columns n="9"/>
202 </assert_contents>
203 </output>
204 <output name="variableMetadata_out">
205 <assert_contents>
206 <has_n_columns n="7"/>
207 </assert_contents>
208 </output>
209 </test>
210 </tests>
211
212 <help>
213
214 .. class:: infomark
215
216 **Author** Etienne Thevenot (CEA, LIST, MetaboHUB Paris, etienne.thevenot@cea.fr)
217
218 ---------------------------------------------------
219
220 .. class:: infomark
221
222 **Please cite**
223
224 Etienne A. Thevenot, Aurelie Roux, Ying Xu, Eric Ezan, and Christophe Junot (2015). Analysis of the human adult urinary metabolome variations with age, body mass index and gender by implementing a comprehensive workflow for univariate and OPLS statistical analyses. *Journal of Proteome Research*, **14**:3322-3335 (http://dx.doi.org/10.1021/acs.jproteome.5b00354).
225
226 ---------------------------------------------------
227
228 .. class:: infomark
229
230 **R package**
231
232 The *ropls* package is available from the bioconductor repository (http://bioconductor.org/packages/ropls).
233
234 ---------------------------------------------------
235
236 .. class:: infomark
237
238 **Tool updates**
239
240 See the **NEWS** section at the bottom of this page
241
242 ---------------------------------------------------
243
244 ==============================================
245 Multivariate analysis with PCA and (O)PLS(-DA)
246 ==============================================
247
248 -----------
249 Description
250 -----------
251
252 **Latent variable modeling** with Principal Component Analysis (**PCA**) and Partial Least Squares (**PLS**) are powerful methods for **visualization**, **regression**, **classification**, and feature selection of **omics data** where the number of variables exceeds the number of samples and with multicollinearity among variables (Wold et al, 2001; Thenenhaus, 1998; Wehrens, 2011; Eriksson et al, 2006; Trygg et al, 2007). Orthogonal Partial Least Squares (**OPLS**) enables to separately model the variation correlated (predictive) to the factor of interest and the uncorrelated (orthogonal) variation (Trygg and Wold, 2002). While performing similarly to PLS, OPLS facilitates interpretation. Successful applications of these chemometrics techniques include spectroscopic data such as Raman spectroscopy, nuclear magnetic resonance (NMR), mass spectrometry (MS) in metabolomics and proteomics, but also transcriptomics data. In addition to **scores**, **loadings** and **weights** plots, the module provides metrics and graphics to determine the optimal number of components (e.g. with the **R2** and **Q2** coefficients; Wold et al, 2001; Thenenhaus, 1998; Eriksson et al, 2006), check the **validity of the model** by permutation testing (Szymanska et al, 2012), detect **outliers** (Wold et al, 2001; Thenenhaus, 1998; Hubert et al, 2005), and provide several metrics to assess the importance of the variables in the model (e.g. **Variable Importance in Projection** or regression coefficients; Wold et al, 2001; Mehmood et al, 2012; Galindo-Prieto et al, 2014). The module is an implementation of the **ropls** R package available from Bioconductor (Thevenot et al, 2015).
253
254 --------
255 Comments
256 --------
257
258 1) Overfitting
259 | Overfitting (i.e., building a model with good performances on the training set but poor performances on a new test set) is a major caveat of machine learning techniques applied to data sets with more variables than samples. A simple simulation with a random dataMatrix and a random response shows that perfect PLS-DA classification can be achieved as soon as the number of variables exceeds the number of samples (Wehrens, 2011). It is therefore essential to check that the Q2 value of the model is significant by random permutation of the labels: the number of permutations (advanced computational parameter) is set to 20 by default but should be increased for confirmation of the results.
260
261 2) VIP from OPLS models
262 | The classical VIP metric is not useful for OPLS modeling of a single response since (Galindo-Prieto et al, 2014; Thevenot et al, 2015). In fact, when features are standardized, we can demonstrate a mathematical relationship between VIP and *p*-values from a Pearson correlation test (Thevenot et al, 2015): classical VIP are therefore univariate for OPLS(-DA) models (and identical whatever the number of orthogonal components of the model).
263 | Galindo-Prieto et al. (2014) have therefore recently suggested new VIP metrics for OPLS, VIP*pred* and VIP*ortho*, to separately measure the influence of the features in the modeling of the dispersion correlated to, and orthogonal to the response, respectively.
264 | For OPLS(-DA) models, the output variableMetadata contains the 2 metrics: VIP_pred is a measure of the variable importance in prediction and VIP_ortho is a measure of the variable importance in orthogonal modeling. VIP_pred and VIP_ortho are scaled as the classical VIP (i.e., the mean of their squared values equals 1).
265
266 3) (Orthogonal) Partial Least Squares Discriminant Analysis: (O)PLS-DA
267 | The approach for discriminant analysis implemented in the module relies on internal conversion of the response into a dummy vector (resp. a matrix when the number of classes is > 2), mean-centering and unit-variance scaling of the vector (resp. the matrix), and PLS (resp. PLS2) regression modeling.
268 | When the sizes of the 2 classes are unbalanced, Brereton and Lloyd (2014) have demonstrated that a bias is introduced in the computation of the decision rule, which penalizes the class with the highest size. In the multiclass case, the proportions of 0 and 1 in the columns is usually unbalanced (even in the case of balanced size of the classes) resulting in a bias (Brereton and Llyod, 2014).
269 | With the current implementation of the module, we thus recommend to stick to binary discrimination and use balanced classes for optimal use.
270
271 ----------
272 References
273 ----------
274
275 | Brereton R.G. and Lloyd G.R. (2014). Partial least squares discriminant analysis: taking the magic away. *Journal of Chemometrics*, 28:213-225. http://dx.doi.org/10.1002/cem.2609
276 | Eriksson I., Johansson E., Kettaneh-Wold N. and Wold S. (2001). Multi- and megavariate data analysis. Principles and applications. *Umetrics Academy*.
277 | Galindo-Prieto B., Eriksson L. and Trygg J. (2014). Variable influence on projection (VIP) for orthogonal projections to latent structures (OPLS). *Journal of Chemometrics*, 28:623-632. http://dx.doi.org/10.1002/cem.2627
278 | Hubert M., Rousseeuw P. and Vanden Branden K. (2005). ROBPCA: a new approach to robust principal component analysis. *Technometrics*, 47:64-79. http://dx.doi.org/10.1198/004017004000000563
279 | Mehmood T., Liland K.H., Snipen L. and Saebo S. (2012). A review of variable selection methods in Partial Least Squares Regression. *Chemometrics and Intelligent Laboratory Systems*, 118:62-69. http://dx.doi.org/10.1016/j.chemolab.2012.07.010
280 | Szymanska E., Saccenti E., Smilde A. and Westerhuis J. (2012). Double-check: validation of diagnostic statistics for PLS-DA models in metabolomics studies. *Metabolomics*, 8:3-16. http://dx.doi.org/10.1007/s11306-011-0330-3
281 | Tenenhaus M. (1998). La regression PLS : theorie et pratique. *Technip*.
282 | Thevenot E.A., Roux A., Xu Y., Ezan E. and Junot C. (2015). Analysis of the human adult urinary metabolome variations with age, body mass index and gender by implementing a comprehensive workflow for univariate and OPLS statistical analyses. *Journal of Proteome Research*, 14:3322-3335. http://dx.doi.org/10.1021/acs.jproteome.5b00354
283 | Trygg J. and Wold S. (2002). Orthogonal projection to latent structures (O-PLS). *Journal of Chemometrics*, 16:119-128. http://dx.doi.org/10.1002/cem.695
284 | Trygg J., Holmes E. and Lundstedt T. (2007). Chemometrics in Metabonomics. *Journal of Proteome Research*, 6:469-479. http://dx.doi.org/10.1021/pr060594q
285 | Wehrens W. (2011). Chemometrics with R. *Springer*.
286 | Wold S., Sjostrom M. and Eriksson L. (2001). PLS-regression: a basic tool of chemometrics. *Chemometrics and Intelligent Laboratory Systems*, 58:109-130. http://dx.doi.org/10.1016/S0169-7439(01)00155-1
287
288 -----------------
289 Workflow position
290 -----------------
291
292 .. image:: multivariate_workflowPositionImage.png
293 :width: 600
294
295 -----------
296 Input files
297 -----------
298
299 +---------------------------+------------+
300 | File | Format |
301 +===========================+============+
302 | 1) Data matrix | tabular |
303 +---------------------------+------------+
304 | 2) Sample metadata | tabular |
305 +---------------------------+------------+
306 | 3) Variable metadata | tabular |
307 +---------------------------+------------+
308
309
310 ----------
311 Parameters
312 ----------
313
314 Data matrix file
315 | variable x sample **dataMatrix** tabular separated file of the numeric data matrix, with . as decimal, and NA for missing values; the table must not contain metadata apart from row and column names; the row and column names must be identical to the rownames of the sample and variable metadata, respectively (see below)
316 |
317
318 Sample metadata file
319 | sample x metadata **sampleMetadata** tabular separated file of the numeric and/or character sample metadata, with . as decimal and NA for missing values
320 |
321
322 Variable metadata file
323 | variable x metadata **variableMetadata** tabular separated file of the numeric and/or character variable metadata, with . as decimal and NA for missing values
324 |
325
326 Y Response (mandatory for PLS and OPLS; keep the default, none, for PCA)
327 | Column of the sample metadata table to be used as (qualitative or quantitative) response for (O)PLS(-DA)
328 |
329
330 Number of (predictive) components (default = NA)
331 | For OPLS(-DA), this number is automatically converted to 1; otherwise if set to **NA**, the optimal number of components is automatically determined by cross-validation: components are extracted until (i) PCA case: the variance is less than the mean variance of all components (note that this rule requires all components to be computed and can be quite time-consuming for large datasets) or (ii) PLS case: either R2Y of the component is less than 0.01 or Q2Y is less than 0 (when the dataset contains more than 100 samples) or 0.05 otherwise
332 |
333
334 Number of orthogonal components (mandatory for OPLS(-DA); default = 0 otherwise)
335 | When set to **0** [default], PLS will be performed; otherwise OPLS will be peformed; when set to **NA**, OPLS is performed and the number of orthogonal components is automatically computed by using cross-validation
336 |
337
338 Samples for prediction (for (O)PLS(-DA) only; default is no)
339 | In case predictions should be computed on test samples, provide in your **sampleMetadata** a column named **test.** (use exactly this column name, with the dot at the end) and containing **yes** and **no** values to indicate which samples should be tested; for those samples, the values of the response will not be used (you can leave **NA** in the response column of the **sampleMetadata**)
340 |
341
342 Advanced graphical parameters
343 |
344
345 Graphic type (default = summary)
346 | **summary** 4-plot graphics showing **overview** (or **permutation** when the number of permutations is superior to 0; see below), **outlier**, **x-loading** and **x-score**
347 | **correlation** Variable correlations with the components
348 | **outlier** Observation diagnostics (score and orthogonal distances)
349 | **overview** Model overview showing R2Ycum and Q2cum (or 'Variance explained' for PCA)
350 | **permutation** Scatterplot of R2Y and Q2Y actual and simulated models after random permutation of response values
351 | **predict-train** Predicted vs Actual Y for the reference set (only if Y has a single column)
352 | **x-loading** X-Loadings
353 | **x-score** X-Scores
354 | **x-varcor** Spread of raw variables corresp. to quantile variances and, if the number of variables is less than 100, correlations between the X-variables
355 | **xy-score** XY-Scores,
356 | **xy-weight** XY-Weights
357 | .pdf image files can be converted to high-resolution .tif images (e.g. for publication) by using in the open-source Gimp software: open the .pdf with resolution = 300, and export as a .tif image without compression
358 |
359
360 Ellipses (default = NA)
361 | If 'NA' ellipses are drawn automatically for (O)PLS-DA, or for PCA, when a column of characters is selected in the 'sample colors' argument below. If you do not want ellipses, set to none.
362 |
363
364 Sample colors (default = none)
365 | Name of the column of the sample table with the classes to be used for coloring the samples on plots (e.g. for PCA or if you wish to highlight a factor distinct from the response above); by default (none) sample names are converted into a color palette"
366 |
367
368 Sample labels (default = none)
369 | Name of the column of the sample table with the classes to be used for labeling the samples on plots; by default (none), sample names will be used
370 |
371
372 Component to be displayed as abscissa (default = 1)
373 | In case of OPLS(-DA), the first component (i.e. the predictive component) must be set to 1"
374 |
375
376 Component to be displayed as ordinate (default = 2)
377 | Note: In case of OPLS(-DA), the orthogonal component of the value below - 1 will be displayed (e.g. to see the first orthogonal component, select the value **2** (default)
378 |
379
380 Number of variables most contributing to loadings to be highlighted (default = 3)
381 | Such variables will be colored in red on the loading plot; In addition, the loading values and the correlation with the components will be printed in the text summary
382 |
383
384 Advanced computational parameters
385 |
386
387 Scaling (default = standard)
388 | Either mean-centering alone (**center**), or followed by pareto scaling (**pareto**), or unit-variance scaling (**standard**)
389 |
390
391 Permutation testing for (O)PLS(-DA) models: Number of permutations (default = 20)
392 | Number of random permutations of response labels to estimate R2Y and Q2Y significance; Default is 10 for single response models and 0 otherwise
393 |
394
395 Log10 transformation (default = no)
396 | Should the data matrix values be log10 transformed? Note: zeros are set to 1 prior to transformation
397 |
398
399 Train/test partition (default = none)
400 | When set to **odd**, samples with odd indices are used to train the model, which is subsequently tested on the samples with even indices; a RMSEP (root mean square error estimation of prediction) is computed, in addition to the RMSEE (error of estimation). Note that in case of a qualitative response, the proportion of samples in each class in the full dataset is preserved within the reference and train subsets
401 |
402
403 Algorithm (default = svd for PCA and nipals for PLS and OPLS)
404 | When using **svd** (singular value decomposition) for PCA on an **dataMatrix** containing missing values (NA), the latters are set to half the minimum of non-missing values and a warning is generated; an alternative is to use the **nipals** algorithm (non-linear iterative partial least squares, based on a power method to find eigenvalues, and able to handle a small amount of missing values); For PLS and OPLS, only the **nipals** algorithm is available
405 |
406
407 Number of cross-validation segments (default = 7)
408 |
409
410
411 ------------
412 Output files
413 ------------
414
415
416 sampleMetadata_out.tabular
417 | **sampleMetadata** file identical to the file given as argument, except that two columns with the x-scores of the displayed components have been added
418 |
419
420 variableMetadata_out.tabular
421 | **variableMetadata** file identical to the file given as argument, except that i) 3 columns with the x-loadings of the displayed components, and the regression coefficients, have been added, ii) in the case of PLS, a column with the VIP values (variable importance in projection of the model with all components) has been added, iii) in the case of OPLS, 2 columns with the VIP_pred and VIP_ortho have been added.
422 |
423
424 figure.pdf
425 | Graphic
426 |
427
428 information.txt
429 | Text file with all messages and warnings generated during the computation
430 |
431
432 ---------------------------------------------------
433
434 ----------------
435 Working examples
436 ----------------
437
438 |
439
440 .. class:: infomark
441
442 See the **W4M00001a_sacurine-subset-statistics**, **W4M00001b_sacurine-complete**, **W4M00002_mtbls2** or **W4M00003_diaplasma** shared histories in the **Shared Data/Published Histories** menu
443
444
445 Figure output
446 =============
447
448 .. image:: multivariate_workingExampleImage.png
449 :width: 600
450
451 ---------------------------------------------------
452
453 ----
454 NEWS
455 ----
456
457 CHANGES IN VERSION 2.3.2
458 ========================
459
460 NEW FEATURES
461
462 Error messages are generated in OPLS(-DA) models in case of non-significance of either the predictive or the first orthogonal component
463
464 INTERNAL MODIFICATIONS
465
466 Modifications of the **wrapper** file to handle the recent **ropls** package versions (i.e. 1.3.15 and above) which use S4 classes
467
468 CHANGES IN VERSION 2.3.0
469 ========================
470
471 NEW FEATURES
472
473 1) **Predictions** now available (see the 'Samples to be tested' argument)
474 2) OPLS(-DA): **Predictive and Orthogonal VIP** are now computed (see the 'comments' section)
475 3) **Multiclass PLS-DA** implemented (see the 'comments' section)
476
477 MINOR MODIFICATIONS
478
479 1) Changes in color palette: black/grey colors for diagnostics and other colors for scores
480 2) Default number of permutations set to 20 (instead of 10)
481 3) Predictive components denoted in the tables by 'p' (instead of 'h' previously)
482
483 CHANGES IN VERSION 2.2.4
484 ========================
485
486 1) Correction in the Galaxy wrapper (in the previous version, the number of predictive components was sometimes set to the maximum by mistake)
487 2) The regression coefficients are now provided as a new column of the variableMetadata output
488
489 CHANGES IN VERSION 2.2.3
490 ========================
491
492 The default number of permutations is set to 10 (instead of 100) as a compromise to enable both a quick computation and a first hint at model significance
493
494 CHANGES IN VERSION 2.2.2
495 ========================
496
497 1) A default of 100 permutations has been set in order to check for overfitting; in addition, 'permutation', 'overview', and 'outlier' plots are now displayed by default
498 2) Classification is currently implemented for two-class responses only
499 3) *dataMatrix* is not modified by the tool, so it does not appear as an output files
500 4) Double cross-validation (advanced computational parameters): 'odd' now refers to train (instead to test) indices
501
502 </help>
503
504 <citations/>
505 </tool>