comparison w4mcorcov.xml @ 1:0c2ad44b6c9c draft

planemo upload for repository https://github.com/HegemanLab/w4mcorcov_galaxy_wrapper/tree/master commit 01d4a951cf09e7b88fcec96b8043bc7568cc5c92
author eschen42
date Sun, 22 Oct 2017 18:47:57 -0400
parents 23f9fad4edfc
children e03582f26617
comparison
equal deleted inserted replaced
0:23f9fad4edfc 1:0c2ad44b6c9c
1 <tool id="w4mcorcov" name="OPLS-DA_Contrasts" version="0.98.2"> 1 <tool id="w4mcorcov" name="OPLS-DA_Contrasts" version="0.98.3">
2 2
3 <description>OPLS-DA Contrasts of Univariate Results</description> 3 <description>OPLS-DA Contrasts of Univariate Results</description>
4 4
5 <requirements> 5 <requirements>
6 <requirement type="package">r-batch</requirement> 6 <requirement type="package">r-batch</requirement>
29 ]]></command> 29 ]]></command>
30 30
31 <inputs> 31 <inputs>
32 <param name="dataMatrix_in" label="Data matrix file" type="data" format="tabular" help="Features x samples (tabular data - decimal: '.'; missing: NA; mode: numerical; separator: tab character)" /> 32 <param name="dataMatrix_in" label="Data matrix file" type="data" format="tabular" help="Features x samples (tabular data - decimal: '.'; missing: NA; mode: numerical; separator: tab character)" />
33 <param name="sampleMetadata_in" label="Sample metadata file" type="data" format="tabular" help="Samples x metadata (tabular data - decimal: '.'; missing: NA; mode: character or numerical; separator: tab character)" /> 33 <param name="sampleMetadata_in" label="Sample metadata file" type="data" format="tabular" help="Samples x metadata (tabular data - decimal: '.'; missing: NA; mode: character or numerical; separator: tab character)" />
34 <param name="variableMetadata_in" label="Variable metadata file (from Univariate)" type="data" format="tabular" help="Features x metadata (tabular data - decimal: '.'; missing: NA; mode: character or numerical; separator: tab character)" /> 34 <param name="variableMetadata_in" label="Variable metadata file (ideally from Univariate)" type="data" format="tabular" help="Features x metadata (tabular data - decimal: '.'; missing: NA; mode: character or numerical; separator: tab character)" />
35 <param name="facC" label="Factor of interest" type="text" help="The name of the column of sampleMetadata corresponding to the qualitative variable used to define the contrasts. This also must be a portion of the column names in the variableMetadata file except when the 'Univariate Significance-test' is set to 'none'."/> 35 <param name="facC" label="Factor of interest" type="text" help="REQUIRED - The name of the column of sampleMetadata corresponding to the qualitative variable used to define the contrasts. Except when the 'Univariate Significance-test' is set to 'none', this also must be a portion of the column names in the variableMetadata file."/>
36 <param name="tesC" label="Univariate Significance-Test" type="select" help="Either 'none' or the name of the statistical test that was run by the 'Univariate' tool to produce the variableMetadata file; that name must also be a portion of the column names in that file"> 36 <param name="tesC" label="Univariate Significance-Test" type="select" help="Either 'none' or the name of the statistical test that was run by the 'Univariate' tool to produce the variableMetadata file; that name must also be a portion of the column names in that file.">
37 <option value="none">none - Display all features from variableMetadata (rather than choosing a subset based on significance in univariate testing)</option> 37 <option value="none">none - Display all features from variableMetadata (rather than choosing a subset based on significance in univariate testing)</option>
38 <option value="ttest">ttest - Student's t-test (parametric test, qualitative factor with exactly 2 levels)</option> 38 <option value="ttest">ttest - Student's t-test (parametric test, qualitative factor with exactly 2 levels)</option>
39 <option value="anova">anova - Analysis of variance (parametric test, qualitative factor with more than 2 levels)</option> 39 <option value="anova">anova - Analysis of variance (parametric test, qualitative factor with more than 2 levels)</option>
40 <option value="wilcoxon">wilcoxon - Wilcoxon rank test (nonparametric test, qualitative factor with exactly 2 levels)</option> 40 <option value="wilcoxon">wilcoxon - Wilcoxon rank test (nonparametric test, qualitative factor with exactly 2 levels)</option>
41 <option value="kruskal">kruskal - Kruskal-Wallis rank test (nonparametric test, qualitative factor with more than 2 levels)</option> 41 <option value="kruskal">kruskal - Kruskal-Wallis rank test (nonparametric test, qualitative factor with more than 2 levels)</option>
45 type="boolean" 45 type="boolean"
46 checked="true" 46 checked="true"
47 truevalue="TRUE" 47 truevalue="TRUE"
48 falsevalue="FALSE" 48 falsevalue="FALSE"
49 label="Retain only pairwise-significant features" 49 label="Retain only pairwise-significant features"
50 help="Ignored when 'none' is chosen. Otherwise, when 'Yes', analyze only features that differ significantly for the pair of levels being contrasted; when 'No', include any feature that varies significantly across all levels."/> 50 help="When 'none' is chosen, all features are included in the analysis. Otherwise, when this option is set to 'Yes', analysis will be performed including only features that differ significantly for the pair of levels being contrasted; when set to 'No', any feature that varies significantly across all levels will be included (i.e., exclude any feature that is not significantly different across all levels). See examples below."/>
51 <param name="levCSV" label="Levels of interest" type="text" value = "" help="Comma-separated level-names (or comma-less regular expressions to match level-names) to consider in analysis; must match at least two levels; may include wild cards or regular expressions"> 51 <param name="levCSV" label="Levels of interest" type="text" value = "*" help="Comma-separated level-names (or comma-less regular expressions to match level-names) to consider in analysis; must match at least two levels; levels must be non-numeric; may include wild cards or regular expressions. Note that extra space characters will affect results - 'a,b' is correct, but 'a , b' is not and may fail or give different results.">
52 <sanitizer> 52 <sanitizer>
53 <valid initial="string.letters"> 53 <valid initial="string.letters">
54 <add preset="string.digits"/> 54 <add preset="string.digits"/>
55 <add value="&#36;" /> <!-- $ dollar, dollar-sign --> 55 <add value="&#36;" /> <!-- $ dollar, dollar-sign -->
56 <add value="&#40;" /> <!-- ( left-paren --> 56 <add value="&#40;" /> <!-- ( left-paren -->
72 <add value="&#125;" /> <!-- } r-cube, right-curly-bracket --> 72 <add value="&#125;" /> <!-- } r-cube, right-curly-bracket -->
73 <!-- IMPORTANT - Note that single and double quotes are not part of this list; they have the potential to make the 'command' section insecure or broken. --> 73 <!-- IMPORTANT - Note that single and double quotes are not part of this list; they have the potential to make the 'command' section insecure or broken. -->
74 </valid> 74 </valid>
75 </sanitizer> 75 </sanitizer>
76 </param> 76 </param>
77 <param name="matchingC" label="Level-name matching" type="select" help="How to specify levels generically"> 77 <param name="matchingC" label="Level-name matching" type="select" help="How to specify level-names generically (if at all).">
78 <option value="none" selected="true">do no generic matching (default)</option> 78 <option value="none">do no generic matching (default)</option>
79 <option value="wildcard">use wild-cards for matching level-names</option> 79 <option value="wildcard" selected="true">use wild-cards for matching level-names</option>
80 <option value="regex">use regular expressions for matching level-names</option> 80 <option value="regex">use regular expressions for matching level-names</option>
81 </param> 81 </param>
82 <param name="labelFeatures" type="boolean" checked="true" truevalue="TRUE" falsevalue="FALSE" label="Label features on detail plot" help="When true, add feature labels to points on the covariance-versus-correlation plot."/> 82 <param name="labelFeatures" type="text" value="3" label="Number of features having extreme loadings to label on cov-vs.-cor plot" help="Specify the number of features at each of the four loading-extremes that should be labelled (with the name of the feature) on the covariance-vs.-correlation plot; specify 'ALL' to label all features; this choice has no effect on the OPLS-DA loadings plot."/>
83 </inputs> 83 </inputs>
84 84
85 <outputs> 85 <outputs>
86 <!-- 86 <!--
87 pdf1: summaries of each contrasts, clearly labeled by level=pair name 87 pdf1: summaries of each contrasts, clearly labeled by level=pair name
312 312
313 313
314 Motivation 314 Motivation
315 ---------- 315 ----------
316 316
317 OPLS-DA\ :superscript:`®` and the SIMCA\ :superscript:`®` S-PLOT\ :superscript:`®` (Wiklund *et al.*, 2008) may be employed (e.g. Sun *et al.*, 2016) to identify features that are potential biomarkers, i.e. features that are potentially useful to discriminate to which class a sample should be assigned. Workflow4Metabolomics (W4M, Giacomoni *et al.*, 2014, Guitton *et al.*, 2017) provides a suite of tools for preprocessing and statistical analysis of LC-MS, GC-MS, and NMR metabolomics data; however, it does not (as of release 3.0) include a tool for making the equivalent of an S-PLOT. 317 OPLS-DA\ :superscript:`®` and the SIMCA\ :superscript:`®` S-PLOT\ :superscript:`®` (Wiklund *et al.*, 2008) may be employed to draw attention to metabolomic features that are potential biomarkers, i.e. features that are potentially useful to discriminate to which class a sample should be assigned (e.g. Sun *et al.*, 2016). Workflow4Metabolomics (W4M, Giacomoni *et al.*, 2014, Guitton *et al.*, 2017) provides a suite of tools for preprocessing and statistical analysis of LC-MS, GC-MS, and NMR metabolomics data; however, it does not (as of release 3.0) include a tool for making the equivalent of an S-PLOT.
318 318
319 The S-PLOT is computed from mean-centered, pareto-scaled data. This plot presents the correlation of the first score vector from an OPLS-DA model with the sample-variables used to produce that model versus the covariance of the scores with the sample-variables. For OPLS-DA, the first score vector represents the variation among the sample-variables that is explained by the contrasting factor. 319 The S-PLOT is computed from mean-centered, pareto-scaled data. This plot presents the correlation of the first score vector from an OPLS-DA model with the sample-variables used to produce that model versus the covariance of the scores with the sample-variables. For OPLS-DA, the first score vector represents the variation among the sample-variables that is related to the predictor (i.e., the contrasting factor).
320 320
321 The principal aims of this tool are: 321 The primary aims of this tool are:
322 322
323 - To compute multiple contrasts with OPLS-DA and the covariance vs. correlation plot. 323 - To compute and visualize multiple contrasts with OPLS-DA and the covariance vs. correlation plot.
324 - To visualize multiple contrasts compactly yet informatively.
325 - To write the results to data files for use in further multivariate analysis or visualization. 324 - To write the results to data files for use in further multivariate analysis or visualization.
326 325
326 Note: This tool only supports categorical factors with non-numeric level-names.
327 327
328 Description 328 Description
329 ----------- 329 -----------
330 330
331 The purpose of the 'PLS-DA Contrasts' tool is to identify and visualize GC-MS or LC-MS features that are possible biomarkers. 331 The purpose of the 'PLS-DA Contrasts' tool is to visualize GC-MS or LC-MS features that are possible biomarkers.
332 332
333 The W4M 'Univariate' tool (Thévenot *et al.*, 2015) adds the results of family-wise corrected pairwise significance-tests as columns of the **variableMetadata** dataset. 333 The W4M 'Univariate' tool (Thévenot *et al.*, 2015) adds the results of family-wise corrected pairwise significance-tests as columns of the **variableMetadata** dataset.
334 For instance, suppose that you ran Kruskal-Wallis testing for a column named 'cluster' in sampleMetadata that has values 'k1' and 'k2' and at least one other value. 334 For instance, suppose that you ran Kruskal-Wallis testing for a column named 'cluster' in sampleMetadata that has values 'k1' and 'k2' and at least one other value.
335 335
336 - A column of variableMetadata would be labeled 'cluster_kruskal_sig' and would have values '1' and '0', where '1' means that, when the samples are grouped by 'cluster', there is strong evidence against the hypothesis that there is no difference among the intensities for the feature across all sample-groups. 336 - A column of variableMetadata would be labeled 'cluster_kruskal_sig' and would have values '1' and '0'; when the samples are grouped by 'cluster', '1' means that there is strong evidence against the hypothesis that there is no difference among the intensities for the feature across all sample-groups.
337 - A column of variableMetadata would be labeled 'cluster_kruskal_k1.k2_sig' and would have values '1' and '0', where '1' means that there is significant evidence against the hypothesis that samples from sampleMetadata whose 'cluster' column contains 'k1' or 'k2' have the same intensity for that feature. 337 - A column of variableMetadata would be labeled 'cluster_kruskal_k1.k2_sig' and would have values '1' and '0', where '1' means that there is significant evidence against the hypothesis that samples from sampleMetadata whose 'cluster' column contains 'k1' or 'k2' have the same intensity for that feature.
338 338
339 The 'PLS-DA Contrasts' tool produces graphics and data for OPLS-DA contrasts of feature-intensities between significantly different pairs of factor-levels. For each factor-level, the tool performs a contrast with all other factor-levels combined and then separately with each other factor-level. 339 The 'PLS-DA Contrasts' tool produces graphics and data for OPLS-DA contrasts of feature-intensities between significantly different pairs of factor-levels. For each factor-level, the tool performs a contrast with all other factor-levels combined and then separately with each other factor-level.
340 340
341 This tool can be used in a purely exploratory manner by supplying the variableMetadata file without the columns added by the W4M 'Univariate' tool. However, the preferred workflow (Thévenot *et al.*, 2015) is to use univariate testing to exclude features that are not significantly different and use OPLS-DA to visualize the differences identified in univariate testing; an appropriate exception would be to visualize contrasts of a specific list of metabolites. 341 **Along the left-to-right axis, the plots show the supervised projection of the variation explained by the predictor** (i.e., the factor specified when invoking the tool); **the top-to-bottom axis displays the variation that is orthogonal to the predictor level** (i.e., independent of it).
342 342
343 It must be stressed that there may be no definitive computational approach to select features that are guaranteed to be reliable biomarkers. Possible methods are examining extreme values on S-PLOTs, examining "variable importance in projection VIP for OPLS-DA" (Galindo-Prieto *et al.* 2014), or examining a feature's "selectivity ratio" (Rajalahti *et al.*, 2009). In this spirit, this tool reports the S-PLOT covariance and correlation (Wiklund *op. cit.*) and VIP metrics, and it introduces an informal "salience" metric to flag features that may merit attention without dimensional reduction; future versions may add selectivity ratio. Regardless of how any potential biomarker is identified, it should be subjected to further validation analysis before it is recommended for general application. 343 Although this tool can be used in a purely exploratory manner by supplying the variableMetadata file without the columns added by the W4M 'Univariate' tool, **the preferred workflow is to use univariate testing to exclude features that are not significantly different and use OPLS-DA to visualize the differences identified in univariate testing** (Thévenot *et al.*, 2015); an appropriate exception would be to visualize contrasts of a specific list of metabolites.
344
345 It must be stressed that there may be no *single* definitive computational approach to select features that are reliable biomarkers, especially from a small number of samples or experiments. A few possible choices are examining extreme values on S-PLOTs, examining "variable importance in projection VIP for OPLS-DA" (Galindo-Prieto *et al.* 2014), and examining a feature's "selectivity ratio" (Rajalahti *et al.*, 2009). In this spirit, this tool reports the S-PLOT covariance and correlation (Wiklund *op. cit.*) and VIP metrics, and it introduces an informal "salience" metric to flag features that may merit attention without dimensional reduction; future versions may add selectivity ratio.
346
347 For a more systematic approach to biomarker identification, please consider the W4M 'biosigner' tool (Rinuardo *et al.* 2016), which applies three different identification metrics to the selection process.
348
349 Regardless of how any potential biomarker is identified, further validation analysis (e.g., independent confirmatory experiments) is needed before it is recommended for general application.
344 350
345 351
346 W4M Workflow Position 352 W4M Workflow Position
347 --------------------- 353 ---------------------
348 354
350 - Downstream tool categories: **Statistical Analysis** 356 - Downstream tool categories: **Statistical Analysis**
351 357
352 Input files 358 Input files
353 ----------- 359 -----------
354 360
355 +----------------------+-----------+ 361 +----------------------+-----------+
356 | File | Format | 362 | File | Format |
357 +======================+===========+ 363 +======================+===========+
358 | Data matrix | tabular | 364 | Data matrix | tabular |
359 +----------------------+-----------+ 365 +----------------------+-----------+
360 | Sample metadata | tabular | 366 | Sample metadata | tabular |
361 +----------------------+-----------+ 367 +----------------------+-----------+
362 | Variable metadata | tabular | 368 | Variable metadata | tabular |
363 +----------------------+-----------+ 369 +----------------------+-----------+
364 370
365 Output files 371 Output files
366 ------------ 372 ------------
367 373
368 +------------------------------+-----------+ 374 +-------------------------------------------+-----------+
369 | File | Format | 375 | File | Format |
370 +==============================+===========+ 376 +===========================================+===========+
371 | Contrast detail | pdf | 377 | Contrast detail | pdf |
372 +------------------------------+-----------+ 378 +-------------------------------------------+-----------+
373 | Contrast cor and cov | tabular | 379 | Contrast "corrlation and covariance" data | tabular |
374 +------------------------------+-----------+ 380 +-------------------------------------------+-----------+
375 | Variable metadata supplement | tabular | 381 | Feature "salience" data | tabular |
376 +------------------------------+-----------+ 382 +-------------------------------------------+-----------+
377 383
378 Parameters 384 Parameters
379 ---------- 385 ----------
380 386
381 [IN] Data matrix file 387 [IN] Data matrix file
382 | variable x sample **dataMatrix** (tabular separated values) file of the numeric data matrix, with . as decimal, and NA for missing values; the table must not contain metadata apart from row and column names; the row and column names must be identical to the rownames of the sample and variable metadata, respectively (see below) 388 | variable x sample **dataMatrix** (tabular separated values) file of the numeric data matrix, with '.' as decimal, and 'NA' for missing values; the table must not contain metadata apart from row and column names; the row and column names must be identical to the rownames of the sample and variable metadata, respectively (see below)
383 | 389 |
384 390
385 [IN] Sample metadata file 391 [IN] Sample metadata file
386 | sample x metadata **sampleMetadata** (tabular separated values) file of the numeric and/or character sample metadata, with . as decimal and NA for missing values 392 | sample x metadata **sampleMetadata** (tabular separated values) file of the numeric and/or character sample metadata, with '.' as decimal and 'NA' for missing values
387 | 393 |
388 394
389 [IN] Variable metadata file 395 [IN] Variable metadata file
390 | variable x metadata **variableMetadata** (tabular separated values) file of the numeric and/or character variable metadata, with . as decimal and NA for missing values 396 | variable x metadata **variableMetadata** (tabular separated values) file of the numeric and/or character variable metadata, with '.' as decimal and 'NA' for missing values
391 | 397 |
392 398
393 [IN] Test 399 [IN] Test
394 | Name of the **statistical test** - a component of column names in variable metadata table 400 | Name of the **statistical test** - a component of column names in variable metadata table
395 | May be one of 'none', 'ttest', 'gwilcoxon', 'anova', 'kruskal', 'pearson', 'spearman' 401 | May be one of 'none', 'ttest', 'gwilcoxon', 'anova', 'kruskal', 'pearson', 'spearman'
396 | 402 |
397 403
398 [IN] Factor of interest 404 [IN] Factor of interest
399 | Name of the **column of sampleMetadata** corresponding to the qualitative or quantitative variable 405 | Name of the **column of sampleMetadata** corresponding to the qualitative or quantitative variable
400 | 406 |
401 407
402 [IN] Retain only pairwise-significant features 408 [IN] Retain only pairwise-significant features
403 | When true, for each contrast of two levels, include only those features which pass the significance threshold for that contrast. Choosing true results in an OPLS-DA model that better reflects and visualizes the difference detected by univariate analysis, with somewhat increased reliability of prediction (as assessed by cross-validation). 409 | When **true**, for each contrast of two levels, include only those features which pass the significance threshold for that contrast. Choosing true results in an OPLS-DA model that better reflects and visualizes the difference detected by univariate analysis, with somewhat increased reliability of prediction (as assessed by cross-validation).
404 | When false, include all features that pass the significance threshold when testing for difference across all factor-levels. This choice produces a plot that displays more features but is not necessarily more informative. 410 | When **false**, include all features that pass the significance threshold when testing for difference across all factor-levels. This choice produces a plot that displays more features but is not necessarily more informative.
405 | 411 | *Note that when 'Test' is 'none', all features are included in the analysis and this parameter has no effect.*
412 |
406 413
407 [IN] Levels of interest 414 [IN] Levels of interest
408 | comma-separated **level-names** (or comma-less regular expressions to match level-names) to consider in analysis; must match at least two levels; may include wild cards or regular expressions 415 | Comma-separated **level-names** (or comma-less regular expressions to match level-names) to consider in analysis; must match at least two levels; may include wild cards or regular expressions.
409 | 416 |
410 417
411 [IN] Level-name matching 418 [IN] Level-name matching
412 | How to **specify levels generically** - wild cards, regular expressions, or none (no generic matching) 419 | Indicator of **how levels are to be specified generically** (if at all) - wild cards, regular expressions, or none (no generic matching).
413 | 420 |
414 421
415 [OUT] Contrast-detail output PDF 422 [OUT] Contrast-detail output PDF
416 | Several plots for each two-projection OPLS-DA analysis: 423 | Several plots for each two-projection OPLS-DA analysis:
417 424
418 - (top-left) **correlation-versus-covariance plot** of OPLS-DA results (a work-alike for the S-PLOT, computed using formula in Supplement to Wiklund, *op. cit.*); point-color becomes saturated as the "variable importance in projection to the predictive components" (VIP\ :subscript:`4,p` from Galindo-Prieto *et al.* 2014) ranges from 0.83 and 1.21 (Mehmood *et al.* 2012) 425 - (top-left) **correlation-versus-covariance plot** of OPLS-DA results (a work-alike for the S-PLOT, computed using formula in Supplement to Wiklund, *op. cit.*); point-color becomes saturated as the "variable importance in projection to the predictive components" (VIP\ :subscript:`4,p` from Galindo-Prieto *et al.* 2014) ranges from 0.83 and 1.21 (Mehmood *et al.* 2012)
419 - (bottom-left) **model-overview plot** for the two projections; grey bars are the correlation coefficient for the fitted data; black bars indicate performance in cross-validation tests (Thévenot, 2017) 426 - (bottom-left) **model-overview plot** for the two projections; grey bars are the correlation coefficient for the fitted data; black bars indicate performance in cross-validation tests (Thévenot, 2017)
420 - (top-right) OPLS-DA **scores-plot** for the two projections (Thévenot *et al.*, 2015) 427 - (top-right) OPLS-DA **scores-plot** for the two projections (Thévenot *et al.*, 2015)
421 - (bottom-right) OPLS-DA **loadings-plot** for the two projections (*ibid.*) 428 - (bottom-right) OPLS-DA **loadings-plot** for the two projections (*ibid.*)
422 429
423 [OUT] Contrast Correlation-Covarinace data TABULAR 430 [OUT] Contrast Correlation-Covarinace data TABULAR
424 | A tab-separated values file having the following columns: 431 | A tab-separated values file of metadata for each feature for each contrast in which it was included.
432 | Thus, a given feature may appear many times, but *the combination of featureID, factorLevel1, and factorLevel2 will be unique.*
433 | This file has the following columns:
425 434
426 - **featureID** - feature-identifier 435 - **featureID** - feature-identifier
427 - **factorLevel1** - factor-level 1 436 - **factorLevel1** - factor-level 1
428 - **factorLevel2** - factor-level 2 (or "other" when contrasting factor-level 1 with all other levels) 437 - **factorLevel2** - factor-level 2 (or "other" when contrasting factor-level 1 with all other levels)
429 - **correlation** - correlation of the features projection explaining the difference between the features, < 0 when intensity for level 1 is greater (from formula in Supplement to Wiklund, *op. cit.*) 438 - **correlation** - correlation of the features projection explaining the difference between the features, < 0 when intensity for level 1 is greater (from formula in Supplement to Wiklund, *op. cit.*)
430 - **covariance** - covariance of the features projection explaining the difference between the features, < 0 when intensity for level 1 is greater (from formula in *ibid.*) 439 - **covariance** - covariance of the features projection explaining the difference between the features, < 0 when intensity for level 1 is greater (from formula in *ibid.*)
431 - **vip4p** - "variable importance in projection" to the predictive components (VIP\ :subscript:`4,p` from Galindo-Prieto *op. cit.*) 440 - **vip4p** - "variable importance in projection" to the predictive projection, VIP\ :subscript:`4,p` (Galindo-Prieto *op. cit.*)
432 - **vip4o** - "variable importance in projection" to the orthogonal components (VIP\ :subscript:`4,o` from Galindo-Prieto *op. cit.*) 441 - **vip4o** - "variable importance in projection" to the orthogonal projection, VIP\ :subscript:`4,o` (*ibid.*)
442 - **loadp** - variable loading for the predictive projection (Wiklund *op. cit.*)
443 - **loado** - variable loading for the orthogonal projection (*ibid.*)
433 - **level1Level2Sig** - (Only present when a test other than "none" is chosen) '1' when feature varies significantly across all classes (i.e., not pair-wise); '0' otherwise 444 - **level1Level2Sig** - (Only present when a test other than "none" is chosen) '1' when feature varies significantly across all classes (i.e., not pair-wise); '0' otherwise
434 445
435 [OUT] Feature "Salience" data TABULAR 446 [OUT] Feature "Salience" data TABULAR
436 | Metrics for the "salient level" for each feature, i.e., the level at which the feature is more prominent than any other level. This is *not* at all related to the SIMCA OPLS-DA S-PLOT; rather, it is intended as a potential (and unproven) way to identify features that may suggest potential biomarkers without dimensional reduction of data. This is a tab-separated values file having the following columns: 447 | Metrics for the "salient level" for each feature, i.e., the level at which the feature is more prominent than any other level. This is *not* at all related to the SIMCA OPLS-DA S-PLOT; rather, it is intended as a potential (and unproven) way to identify features that may suggest potential biomarkers without dimensional reduction of data. This is a tab-separated values file having the following columns:
437 448
438 - **featureID** - feature identifier 449 - **featureID** - feature identifier
439 - **salientLevel** - salient level, i.e., for the feature, the class-level having the greatest median intensity 450 - **salientLevel** - salient level, i.e., for the feature, the class-level having the greatest median intensity
440 - **salientRCV** - salient robust coefficient of variation, i.e., for the feature, the mean absolute deviation of the intensity for the salient level divided by the median intensity for the salient level 451 - **salientRCV** - salient robust coefficient of variation, i.e., for the feature, the mean absolute deviation of the intensity for the salient level divided by the median intensity for the salient level
441 - **salience** - salience, i.e., for the feature, the median of the class-level having the greatest intensity divided by the mean of the medians for all class-levels 452 - **salience** - salience, i.e., for the feature, the median of the class-level having the greatest intensity divided by the mean of the medians for all class-levels
467 478
468 - '``^``' matches the beginning of a level-name 479 - '``^``' matches the beginning of a level-name
469 - '``$``' matches the end of a level-name 480 - '``$``' matches the end of a level-name
470 - '``.``' outside of square brackets matches a single character 481 - '``.``' outside of square brackets matches a single character
471 - '``*``' matches character specified immediately before zero or more times 482 - '``*``' matches character specified immediately before zero or more times
472 - square brackets specify a set of characters to be matched. 483 - Square brackets specify a set of characters to be matched. Within square brackets:
473 484
474 Within square brackets 485 - '``^``' as the first character specifies that the list of characters are those that should **not** be matched.
475 486 - '``-``' is used to specify ranges of characters
476 - '``^``' as the first character specifies that the list of characters are those that should **not** be matched. 487
477 - '``-``' is used to specify ranges of characters 488 Caveat: The tool wrapper uses the comma ('``,``') to split a list of sample-level names, so **commas may not be used within regular expressions for this tool.**
478
479 Caveat: The tool wrapper uses the comma ('``,``') to split a list of sample-level names, so **commas may not be used within regular expressions for this tool**
480 489
481 First Example: Consider a field of level-names consisting of '``marq3,marq6,marq9,marq12,front3,front6,front9,front12``' 490 First Example: Consider a field of level-names consisting of '``marq3,marq6,marq9,marq12,front3,front6,front9,front12``'
482 491
483 - The regular expression '``^front[0-9][0-9]*$``' will match the same sample-levels as '``front3,front6,front9,front12``' 492 - The regular expression '``^front[0-9][0-9]*$``' will match the same sample-levels as '``front3,front6,front9,front12``'
484 - The regular expression '``^[a-z][a-z]3$``' will match the same sample-levels as '``front3,marq3``' 493 - The regular expression '``^[a-z][a-z]3$``' will match the same sample-levels as '``front3,marq3``'
492 - '``^[A-Z][0-9]*``' - MATCHES '``**^A** B0123$``' - first character is a letter, '``*``' can specify zero characters, and end of line did not need to be matched. 501 - '``^[A-Z][0-9]*``' - MATCHES '``**^A** B0123$``' - first character is a letter, '``*``' can specify zero characters, and end of line did not need to be matched.
493 - '``^[A-Z][A-Z][0-9]``' - MATCHES '``**^AB0** 123$``' - first two characters are letters aind the third is a digit. 502 - '``^[A-Z][A-Z][0-9]``' - MATCHES '``**^AB0** 123$``' - first two characters are letters aind the third is a digit.
494 - '``^[A-Z][A-Z]*[0-9][0-9]$``' - NO MATCH - the name does not end with the pattern '``[A-Z][0-9][0-9]$``', i.e., it ends with four digits, not two. 503 - '``^[A-Z][A-Z]*[0-9][0-9]$``' - NO MATCH - the name does not end with the pattern '``[A-Z][0-9][0-9]$``', i.e., it ends with four digits, not two.
495 - '``^[A-Z][0-9]*$``' - NO MATCH - the pattern specifies that second character and all those that follow, if present, must be digits. 504 - '``^[A-Z][0-9]*$``' - NO MATCH - the pattern specifies that second character and all those that follow, if present, must be digits.
496 505
497 Working example 506 Working examples
498 --------------- 507 ----------------
499 508
500 **Input files** 509 **Input files**
501 510
502 +-------------------+-------------------------------------------------------------------------------------------------------------------+ 511 +-------------------+-------------------------------------------------------------------------------------------------------------------+
503 | Input File | Download from URL | 512 | Input File | Download from URL |
504 +===================+===================================================================================================================+ 513 +===================+===================================================================================================================+
505 | Data matrix | https://raw.githubusercontent.com/HegemanLab/w4mcorcov_galaxy_wrapper/master/test-data/input_dataMatrix.tsv | 514 | Data matrix | https://raw.githubusercontent.com/HegemanLab/w4mcorcov_galaxy_wrapper/master/test-data/input_dataMatrix.tsv |
506 +-------------------+-------------------------------------------------------------------------------------------------------------------+ 515 +-------------------+-------------------------------------------------------------------------------------------------------------------+
507 | Sample metadata | https://raw.githubusercontent.com/HegemanLab/w4mcorcov_galaxy_wrapper/master/test-data/input_sampleMetadata.tsv | 516 | Sample metadata | https://raw.githubusercontent.com/HegemanLab/w4mcorcov_galaxy_wrapper/master/test-data/input_sampleMetadata.tsv |
508 +-------------------+-------------------------------------------------------------------------------------------------------------------+ 517 +-------------------+-------------------------------------------------------------------------------------------------------------------+
509 | Variable metadata | https://raw.githubusercontent.com/HegemanLab/w4mcorcov_galaxy_wrapper/master/test-data/input_variableMetadata.tsv | 518 | Variable metadata | https://raw.githubusercontent.com/HegemanLab/w4mcorcov_galaxy_wrapper/master/test-data/input_variableMetadata.tsv |
510 +-------------------+-------------------------------------------------------------------------------------------------------------------+ 519 +-------------------+-------------------------------------------------------------------------------------------------------------------+
511 520
512 +-------------------------------------------+--------------------------------------------------+ 521 **Example 1:** Include in the analysis only features identified as pair-wise significant in the Univariate test.
513 | Input Parameter | Input value | 522
514 +===========================================+==================================================+ 523 +--------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------+
515 | Factor of interest | k10 | 524 | Input Parameter or Result | Value |
516 +-------------------------------------------+--------------------------------------------------+ 525 +============================================+========================================================================================================================================+
517 | Univariate Significance-Test | kruskal | 526 | Factor of interest | k10 |
518 +-------------------------------------------+--------------------------------------------------+ 527 +--------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------+
519 | Retain only pairwise-significant features | Yes | 528 | Univariate Significance-Test | kruskal |
520 +-------------------------------------------+--------------------------------------------------+ 529 +--------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------+
521 | Levels of interest | k[12],k[3-4] | 530 | Retain only pairwise-significant features | Yes |
522 +-------------------------------------------+--------------------------------------------------+ 531 +--------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------+
523 | Level-name matching | use regular expressions for matching level-names | 532 | Levels of interest | k[12],k[3-4] |
524 +-------------------------------------------+--------------------------------------------------+ 533 +--------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------+
525 | Label features on detail plot | Yes | 534 | Level-name matching | use regular expressions for matching level-names |
526 +-------------------------------------------+--------------------------------------------------+ 535 +--------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------+
527 536 | Number of features having extreme loadings | ALL |
528 +-------------------+---------------------------------------------------------------------------------------------------------------------+ 537 +--------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------+
529 | Expected Output | Download from URL | 538 | Output primary table | https://raw.githubusercontent.com/HegemanLab/w4mcorcov_galaxy_wrapper/master/test-data/expected_contrast_corcov.tsv |
530 +===================+=====================================================================================================================+ 539 +--------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------+
531 | Data matrix | https://raw.githubusercontent.com/HegemanLab/w4mcorcov_galaxy_wrapper/master/test-data/expected_contrast_corcov.tsv | 540 | Output salience table | https://raw.githubusercontent.com/HegemanLab/w4mcorcov_galaxy_wrapper/master/test-data/expected_contrast_salience.tsv |
532 +-------------------+---------------------------------------------------------------------------------------------------------------------+ 541 +--------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------+
542 | Output figures PDF | https://raw.githubusercontent.com/HegemanLab/w4mcorcov_galaxy_wrapper/master/test-data/expected_contrast_detail.pdf |
543 +--------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------+
544
545 **Example 2:** Include in the analysis only features identified as overall-significant in the Univariate test. Note that this even includes these features in contrasts where they were not determined to be pair-wise significant in the Univariate test. Thus, more features are included than in Example 1.
546
547 +--------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------+
548 | Input Parameter or Result | Value |
549 +============================================+========================================================================================================================================+
550 | Factor of interest | k10 |
551 +--------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------+
552 | Univariate Significance-Test | kruskal |
553 +--------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------+
554 | Retain only pairwise-significant features | No |
555 +--------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------+
556 | Levels of interest | ``*`` |
557 +--------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------+
558 | Level-name matching | use wild cards for matching level-names |
559 +--------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------+
560 | Number of features having extreme loadings | 5 |
561 +--------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------+
562 | Output primary table | https://raw.githubusercontent.com/HegemanLab/w4mcorcov_galaxy_wrapper/master/test-data/expected_contrast_corcov_all.tsv |
563 +--------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------+
564 | Output salience table | https://raw.githubusercontent.com/HegemanLab/w4mcorcov_galaxy_wrapper/master/test-data/expected_contrast_salience_all.tsv |
565 +--------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------+
566 | Output figures PDF | https://raw.githubusercontent.com/HegemanLab/w4mcorcov_galaxy_wrapper/master/test-data/expected_contrast_detail_all.pdf |
567 +--------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------+
568
569 **Example 3:** Include all features in the analysis without regard to Univariate testing. Univariate testing is not even a pre-requisite to using the tool when 'none' is selected for the test. Thus, more features are included than in Example 2.
570
571 +--------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------+
572 | Input Parameter or Result | Value |
573 +============================================+========================================================================================================================================+
574 | Factor of interest | k10 |
575 +--------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------+
576 | Univariate Significance-Test | none |
577 +--------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------+
578 | Retain only pairwise-significant features | No |
579 +--------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------+
580 | Levels of interest | k[12],k[3-4] |
581 +--------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------+
582 | Level-name matching | use regular expressions for matching level-names |
583 +--------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------+
584 | Number of features having extreme loadings | 0 |
585 +--------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------+
586 | Output primary table | https://raw.githubusercontent.com/HegemanLab/w4mcorcov_galaxy_wrapper/master/test-data/expected_contrast_corcov_global.tsv |
587 +--------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------+
588 | Output salience table | https://raw.githubusercontent.com/HegemanLab/w4mcorcov_galaxy_wrapper/master/test-data/expected_contrast_salience_global.tsv |
589 +--------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------+
590 | Output figures PDF | https://raw.githubusercontent.com/HegemanLab/w4mcorcov_galaxy_wrapper/master/test-data/expected_contrast_detail_global.pdf |
591 +--------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------+
592
593 **Example 4:** Analysis of a two-level factor (including all features). This suppresses the contrasts of "each factor vs. the aggregate of all the others".
594
595 +--------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------+
596 | Input Parameter or Result | Value |
597 +============================================+========================================================================================================================================+
598 | Factor of interest | lohi |
599 +--------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------+
600 | Univariate Significance-Test | none |
601 +--------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------+
602 | Retain only pairwise-significant features | No |
603 +--------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------+
604 | Levels of interest | low,high |
605 +--------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------+
606 | Level-name matching | use regular expressions for matching level-names |
607 +--------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------+
608 | Number of features having extreme loadings | 3 |
609 +--------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------+
610 | Output primary table | https://raw.githubusercontent.com/HegemanLab/w4mcorcov_galaxy_wrapper/master/test-data/expected_contrast_corcov_lohi.tsv |
611 +--------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------+
612 | Output salience table | https://raw.githubusercontent.com/HegemanLab/w4mcorcov_galaxy_wrapper/master/test-data/expected_contrast_salience_lohi.tsv |
613 +--------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------+
614 | Output figures PDF | https://raw.githubusercontent.com/HegemanLab/w4mcorcov_galaxy_wrapper/master/test-data/expected_contrast_detail_lohi.pdf |
615 +--------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------+
533 616
534 617
535 Trademarks 618 Trademarks
536 ---------- 619 ----------
537 620
539 622
540 623
541 Release notes 624 Release notes
542 ------------- 625 -------------
543 626
544 v0.98.2 - first release 627 0.98.3
628
629 - add support for two-level factors
630 - add adjusted mz and rt to output tables
631 - allow explicitly setting the number of features with extreme loadings to be labeled on the correlation vs. covariance plot
632 - add loadings to corcov table
633
634 0.98.2
635
636 - first release
545 637
546 638
547 ]]></help> 639 ]]></help>
548 <citations> 640 <citations>
549 <!-- Galindo_Prieto_2014 Variable influence on projection (VIP) for OPLS --> 641 <!-- Galindo_Prieto_2014 Variable influence on projection (VIP) for OPLS -->
554 <citation type="doi">10.1016/j.biocel.2017.07.002</citation> 646 <citation type="doi">10.1016/j.biocel.2017.07.002</citation>
555 <!-- Mehmood_2012 PLS-based variable-selection --> 647 <!-- Mehmood_2012 PLS-based variable-selection -->
556 <citation type="doi">10.1186/1748-7188-6-27</citation> 648 <citation type="doi">10.1186/1748-7188-6-27</citation>
557 <!-- Rajalahti_2009 Biomarker discovery using selectivity ratio --> 649 <!-- Rajalahti_2009 Biomarker discovery using selectivity ratio -->
558 <citation type="doi">10.1016/j.chemolab.2008.08.004</citation> 650 <citation type="doi">10.1016/j.chemolab.2008.08.004</citation>
651 <!-- Rinuardo 2016 -->
652 <citation type="doi">10.3389/fmolb.2016.00026</citation>
559 <!-- Sun_2016 Urinary Biomarkers for adolescent idiopathic scoliosis --> 653 <!-- Sun_2016 Urinary Biomarkers for adolescent idiopathic scoliosis -->
560 <citation type="doi">10.1038/srep22274</citation> 654 <citation type="doi">10.1038/srep22274</citation>
561 <!-- Th_venot_2015 Urinary metabolome statistics --> 655 <!-- Th_venot_2015 Urinary metabolome statistics -->
562 <citation type="doi">10.1021/acs.jproteome.5b00354</citation> 656 <citation type="doi">10.1021/acs.jproteome.5b00354</citation>
563 <!-- ropls package --> 657 <!-- ropls package -->
564 <citation type="bibtex"><![CDATA[ 658 <citation type="bibtex"><![CDATA[
565 @incollection{Thevenot_ropls_2017, 659 @incollection{Thevenot_ropls_2017,
566 author = {Th{\'{e}}venot, Etienne A.}, 660 author = {Th{\'{e}}venot, Etienne A.},
567 title = {ropls: PCA, PLS(-DA) and OPLS(-DA) for multivariate analysis and feature selection of omics data}, 661 title = {ropls: PCA, PLS(-DA) and OPLS(-DA) for multivariate analysis and feature selection of omics data},
568 publisher = {bioconductor.org}, 662 publisher = {bioconductor.org},
569 year = {2017}, 663 year = {2017},
570 doi = {10.18129/B9.bioc.ropls}, 664 doi = {10.18129/B9.bioc.ropls},
571 booktitle = {Bioconductor: Open source software for bioinformatics}, 665 booktitle = {Bioconductor: Open source software for bioinformatics},
572 address = {Roswell Park Cancer Institute}, 666 address = {Roswell Park Cancer Institute},
573 } 667 }
574 ]]></citation> 668 ]]></citation>
575 <!-- Wiklund_2008 OPLS PLS-DA and S-PLOT --> 669 <!-- Wiklund_2008 OPLS PLS-DA and S-PLOT -->
576 <citation type="doi">10.1021/ac0713510</citation> 670 <citation type="doi">10.1021/ac0713510</citation>
577 </citations> 671 </citations>
578 <!-- 672 <!--