view w4mcorcov.xml @ 14:90708fdbc22d draft default tip

"planemo upload for repository https://github.com/HegemanLab/w4mcorcov_galaxy_wrapper/tree/master commit 5fd9687d543a48a715b1180caf93abebebd58b0e"
author eschen42
date Wed, 18 Nov 2020 18:53:37 +0000
parents 2ae2d26e3270
children
line wrap: on
line source

<tool id="w4mcorcov" name="OPLS-DA_Contrasts" version="0.98.18">
    <description>OPLS-DA Contrasts of Univariate Results</description>
    <macros>
        <xml name="paramPairSigFeatOnly">
            <param name="pairSigFeatOnly" type="boolean" checked="true" truevalue="TRUE" falsevalue="FALSE"
                label="Retain only pairwise-significant features"
                help="When this option is set to 'Yes', analysis will be performed including only features scored by the univariate test as differing significantly for the pair of levels being contrasted; when set to 'No', any feature that varies significantly across all levels will be included (i.e., exclude only features not scored by the univariate test as significantly varying when all levels are considered).  See examples below." />
        </xml>
        <xml name="cplots">
            <param name="cplot_y" label="C-plot Y-axis" type="select" help="Choose the Y-axis for C-plots.">
                <option value="correlation">Plot VIP versus correlation</option>
                <option value="covariance">Plot VIP versus covariance</option>
            </param>
            <param name="cplot_p" type="boolean" checked="true" truevalue="TRUE" falsevalue="FALSE"
                label="Produce predictor C-plot"
                help="When this option is set to 'Yes', correlation will be plotted against vip4 for predictor loadings." />
            <param name="cplot_o" type="boolean" checked="true" truevalue="TRUE" falsevalue="FALSE"
                label="Produce orthogonal C-plot"
                help="When this option is set to 'Yes', correlation will be plotted against vip4 for orthogonal loadings." />
            <param name="fdr_features" type="text" value="ALL"
                label="How many features for p-value calculation?"
                help="Specify how many features should be used to perform family-wise error rate adjustment of p-values for covariance and correlation.  If you were to eliminate features from the data matrix based on significance criteria prior to running this tool, you would want to include them in the count here to avoid underestimating the p-value.  Specify 'ALL' to signify that all features that could impact p-value calculation are included in the data matrix."/>
        </xml>
    </macros>
    <requirements>
        <requirement type="package">r-base</requirement>
        <requirement type="package">r-batch</requirement>
        <requirement type="package" version="1.22.0">bioconductor-ropls</requirement>
    </requirements>
    <command detect_errors="aggressive"><![CDATA[
    Rscript '$__tool_directory__/w4mcorcov_wrapper.R'
        dataMatrix_in '$dataMatrix_in'
        sampleMetadata_in '$sampleMetadata_in'
        variableMetadata_in '$variableMetadata_in'
        facC '$facC'
        #if str( $signif_test.tesC ) == 'none':
            tesC 'none'
            pairSigFeatOnly 'FALSE'
        #else:
            tesC '$signif_test.tesC'
            pairSigFeatOnly '$signif_test.pairSigFeatOnly'
        #end if
        levCSV '$levCSV'
        matchingC '$matchingC'
        labelFeatures '$labelFeatures'
        #if str( $advanced.advancedFeatures ) == 'none':
            fdr_features 'ALL'
            cplot_p 'FALSE'
            cplot_o 'FALSE'
            cplot_y 'correlation'
        #else if str( $advanced.advancedFeatures ) == 'advanced':
            fdr_features '$advanced.fdr_features'
            cplot_p '$advanced.cplot_p'
            cplot_o '$advanced.cplot_o'
            cplot_y '$advanced.cplot_y'
        #end if
        contrast_detail '$contrast_detail'
        contrast_corcov '$contrast_corcov'
        contrast_salience '$contrast_salience'
        min_crossval_i '$min_crossval_i'
    ]]></command>
    <inputs>
        <param name="dataMatrix_in" format="tabular" label="Data matrix file" type="data"
            help="variables &#10006; samples" />
        <param name="sampleMetadata_in" format="tabular" label="Sample metadata file" type="data"
            help="sample metadata, one row per sample" />
        <param name="variableMetadata_in" format="tabular" label="Variable metadata file (ideally from Univariate)"
            type="data" help="variable metadata, one row per variable" />
        <param name="facC" type="text"
            label="Factor of interest"
            help="REQUIRED - The name of the column of sampleMetadata corresponding to the qualitative variable used to define the contrasts.  Except when the 'Univariate Significance-test' is set to 'none', this also must be a portion of the column names in the variableMetadata file.">
            <sanitizer>
                <valid initial="string.letters">
                    <add preset="string.digits"/>
                    <add value="&#46;"    /> <!-- dot, period -->
                    <add value="&#95;"    /> <!-- underscore -->
                    <!-- R does not permit dashes in column names; neither does SQL -->
                </valid>
            </sanitizer>
        </param>
        <conditional name="signif_test">
            <param name="tesC" label="Univariate significance-test" type="select" help="Either 'none' or the name of the statistical test that was run by the 'Univariate' tool to produce the variableMetadata file.">
                <option value="none">none - Display all features from variableMetadata (rather than choosing a subset based on significance in univariate testing)</option>
                <option value="ttest">ttest - Student's t-test (parametric test, qualitative factor with exactly 2 levels)</option>
                <option value="anova">anova - Analysis of variance (parametric test, qualitative factor with more than 2 levels)</option>
                <option value="wilcoxon">wilcoxon - Wilcoxon rank test (nonparametric test, qualitative factor with exactly 2 levels)</option>
                <option value="kruskal">kruskal - Kruskal-Wallis rank test (nonparametric test, qualitative factor with more than 2 levels)</option>
            </param>
            <when value="none" />
            <when value="ttest">
                <expand macro="paramPairSigFeatOnly" />
            </when>
            <when value="anova">
                <expand macro="paramPairSigFeatOnly" />
            </when>
            <when value="wilcoxon">
                <expand macro="paramPairSigFeatOnly" />
            </when>
            <when value="kruskal">
                <expand macro="paramPairSigFeatOnly" />
            </when>
        </conditional>
        <param name="levCSV" type="text" value="*" label="Levels of interest" 
            help="Comma-separated level-names (or comma-separated regular expressions to match level-names) to consider in analysis; must match at least two levels; levels must be non-numeric; may include wild cards or regular expressions.  Note that extra space characters will affect results - when 'a,b' is correct, 'a, b' is not equivalent and likely will fail or give different results.">
            <sanitizer>
                <valid initial="string.letters">
                    <add preset="string.digits"/>
                    <add value="&#36;"  /> <!-- $ dollar, dollar-sign -->
                    <add value="&#40;"  /> <!-- ( left-paren -->
                    <add value="&#41;"  /> <!-- ) right-paren -->
                    <add value="&#42;"  /> <!-- * splat, asterisk -->
                    <add value="&#43;"  /> <!-- + plus -->
                    <add value="&#44;"  /> <!-- , comma -->
                    <add value="&#45;"  /> <!-- - dash, minus-sign -->
                    <add value="&#46;"  /> <!-- . dot, period -->
                    <add value="&#58;"  /> <!-- : colon -->
                    <add value="&#59;"  /> <!-- ; semi, semicolon -->
                    <add value="&#63;"  /> <!-- ? what, question mark -->
                    <add value="&#91;"  /> <!-- [ l-squib, left-squre-bracket -->
                    <add value="&#92;"  /> <!-- \ whack, backslash -->
                    <add value="&#93;"  /> <!-- ] r-squib, right-squre-bracket -->
                    <add value="&#94;"  /> <!-- ^ hat, caret -->
                    <add value="&#95;"  /> <!-- underscore -->
                    <add value="&#123;" /> <!-- { l-cube, left-curly-bracket -->
                    <add value="&#124;" /> <!-- | pipe -->
                    <add value="&#125;" /> <!-- } r-cube, right-curly-bracket -->
                    <!-- IMPORTANT - Note that single and double quotes are not part of this list; they have the potential to make the 'command' section insecure or broken. -->
                </valid>
            </sanitizer>
        </param>
        <param name="matchingC" label="Level-name matching" type="select" help="How to specify level-names generically. (See help below for details on using wild cards or regular expressions.)">
            <option value="none">do no generic matching</option>
            <option value="wildcard" selected="true">use wild-cards for matching level-names (default)</option>
            <option value="regex">use regular expressions for matching level-names</option>
        </param>
        <param name="labelFeatures" type="text" value="3"
            label="How many features having extreme loadings should be labelled on cov-vs.-cor plot?"
            help="Specify the number of features at each of the loading-extremes that should be labelled (with the name of the feature) on the covariance-vs.-correlation plot; specify 'ALL' to label all features or '0' to label no features; this choice has no effect on the OPLS-DA loadings plot."/>
        <param name="min_crossval_i" type="text" value="7"
            label="Minumum number of samples for OPLS-DA cross-validation."
            help="What is the minimum number of samples to be used by the ropls package for cross-validation of OPLS-DA predictions?  This should be not more than half the number of your samples."/>
        <conditional name="advanced">
            <param name="advancedFeatures" type="select" 
                label="Advanced (C-plots and customized p-value adjustment)"
                help="Choose 'Do not include ...' to hides further choices.">
                <option value="advanced">Include C-plots and customize p-value adjustment.</option>
                <option value="none">Do not include additonal C-plots or customize p-value adjustment.</option>
            </param>
            <when value="none" />
            <when value="advanced">
                <expand macro="cplots" />
            </when>
        </conditional>
    </inputs>
    <outputs>
    <!--
      pdf1: summaries of each contrasts, clearly labelled by level=pair name
        * first PCA score-plot
        * then OPLS score-plot
        * then OPLS S-PLOT; color saturation increases with VIP
        * then C-plots if requrested
    -->
    <data name="contrast_detail" format="pdf" label="${tool.name}_${variableMetadata_in.name}_detail" />
    <!--
      tsv1: cor and cov table with columns:
        * feature-ID
        * factor-level 1
        * factor-level 2, lexically greater than level 1
        * Wiklund_2008 correlation
        * Wiklund_2008 covariance
        * Galindo_Prieto_2014 VIP for predictive components, VIP[4,p]
        * Galindo_Prieto_2014 VIP for orthogonal components, VIP[4,o]
        * When filtering on significance of univariate tests,significance of test of null hypothesis that there is no difference between the two classes, i.e, the pair-wise test.
    -->
    <data name="contrast_corcov" format="tabular" label="${tool.name}_${variableMetadata_in.name}_corcov" />
    <!--
      tsv2: salience table with columns (experimental feature):
        * feature-ID
        * Salient level, i.e., for the feature, the class-level having the greatest median intensity
        * Salient robust coefficient of variation, i.e., for the feature, the mean absolute deviation of the intensity for the salient level divided by the median intensity for the salient level
        * Salience, i.e., for the feature, the median of the class-level having the greatest intensity divided by the mean of the medians for all class-levels.
    -->
    <data name="contrast_salience" format="tabular" label="${tool.name}_${variableMetadata_in.name}_salience" />
  </outputs>
  <tests>
    <!-- test #1 - issue 14 -->
    <test>
      <param name="dataMatrix_in" value="issue14_input_dataMatrix.tsv"/>
      <param name="sampleMetadata_in" value="issue14_input_sampleMetadata.tsv"/>
      <param name="variableMetadata_in" value="issue14_input_variableMetadata.tsv"/>
      <param name="tesC" value="none"/>
      <param name="facC" value="tissue_flowering"/>
      <param name="labelFeatures" value="3"/>
      <param name="min_crossval_i" value="4"/>
      <param name="fdr_features" value="ALL"/>
      <param name="levCSV" value="*"/>
      <param name="matchingC" value="wildcard"/>
      <output name="contrast_corcov">
        <assert_contents>
          <!-- column-labels line -->
          <has_text text="featureID" />
          <has_text text="factorLevel1" />
          <has_text text="factorLevel2" />
          <has_text text="correlation" />
          <has_text text="covariance" />
          <has_text text="vip4p" />
          <has_text text="vip4o" />
          <!-- a matched line -->
          <has_text text="NM516T283_1" />
          <has_text text="flower_yes" />
          <has_text text="leaf_no" />
          <has_text text="-0.98475578586" />
          <has_text text="-58.1219648" />
          <has_text text="2.0103501" />
          <has_text text="2.872672881" />
          <has_text text="-0.1208407903" />
          <has_text text="-0.2032249" />
          <has_text text="-0.9857575" />
          <has_text text="-0.983684189899" />
          <has_text text="516.080116" />
          <has_text text="282.50076" />
        </assert_contents>
      </output>
    </test>
    <!-- test #2 - issue 6 -->
    <test>
      <param name="dataMatrix_in" value="input_dataMatrix.tsv"/>
      <param name="sampleMetadata_in" value="issue6_input_sampleMetadata.tsv"/>
      <param name="variableMetadata_in" value="input_variableMetadata.tsv"/>
      <param name="tesC" value="none"/>
      <param name="facC" value="k._10"/>
      <param name="labelFeatures" value="3"/>
      <param name="min_crossval_i" value="7"/>
      <param name="fdr_features" value="ALL"/>
      <param name="levCSV" value="k_3,k-4"/>
      <param name="matchingC" value="none"/>
      <output name="contrast_corcov">
        <assert_contents>
          <!-- column-labels line -->
          <has_text text="featureID" />
          <has_text text="factorLevel1" />
          <has_text text="factorLevel2" />
          <has_text text="correlation" />
          <has_text text="covariance" />
          <has_text text="vip4p" />
          <has_text text="vip4o" />
          <!-- first matched line -->
          <has_text text="M349.2383T700" />
          <has_text text="-0.1221966" />
          <has_text text="-917311734" />
          <has_text text="0.0304592" />
          <has_text text="0.104748883" />
          <has_text text="-0.002736415" />
          <has_text text="-0.0113968" />
          <has_text text="0.387723" />
          <has_text text="-0.3812168081" />
          <has_text text="0.154611878" />
        </assert_contents>
      </output>
    </test>
    <!-- test #3 - issue 8 -->
    <test>
      <param name="dataMatrix_in" value="input_dataMatrix.tsv"/>
      <param name="sampleMetadata_in" value="issue8_input_sampleMetadata.tsv"/>
      <param name="variableMetadata_in" value="input_variableMetadata.tsv"/>
      <param name="tesC" value="none"/>
      <param name="facC" value="k._10"/>
      <param name="labelFeatures" value="3"/>
      <param name="min_crossval_i" value="7"/>
      <param name="fdr_features" value="ALL"/>
      <param name="levCSV" value="k_3,k-4"/>
      <param name="matchingC" value="none"/>
      <output name="contrast_corcov">
        <assert_contents>
          <!-- column-labels line -->
          <has_text text="featureID" />
          <has_text text="factorLevel1" />
          <has_text text="factorLevel2" />
          <has_text text="correlation" />
          <has_text text="covariance" />
          <has_text text="vip4p" />
          <has_text text="vip4o" />
          <!-- k1 rejected by levCSV, leaving only k_3 and k-4 -->
          <not_has_text text="k1" />
          <not_has_text text="other" />
          <!-- first matched line -->
          <has_text text="M200.005T296" />
          <has_text text="-0.1829149760" />
          <has_text text="-115723402" />
          <has_text text="0.0892595" />
          <has_text text="0.00492288" />
          <has_text text="-0.00801895" />
          <has_text text="0.0005356178" />
          <has_text text="0.1848186" />
          <has_text text="-0.428802311" />
          <has_text text="0.0882045811" />
        </assert_contents>
      </output>
    </test>
    <!-- test #4 -->
    <test>
      <param name="dataMatrix_in" value="input_dataMatrix.tsv"/>
      <param name="sampleMetadata_in" value="input_sampleMetadata.tsv"/>
      <param name="variableMetadata_in" value="input_variableMetadata.tsv"/>
      <param name="tesC" value="kruskal"/>
      <param name="facC" value="k10"/>
      <param name="pairSigFeatOnly" value="FALSE"/>
      <param name="labelFeatures" value="3"/>
      <param name="min_crossval_i" value="7"/>
      <param name="fdr_features" value="250"/>
      <param name="levCSV" value="k[12],k[3-4]"/>
      <param name="matchingC" value="regex"/>
      <output name="contrast_corcov">
        <assert_contents>
          <!-- column-labels line -->
          <has_text text="featureID" />
          <has_text text="factorLevel1" />
          <has_text text="factorLevel2" />
          <has_text text="correlation" />
          <has_text text="covariance" />
          <has_text text="vip4p" />
          <has_text text="vip4o" />
          <has_text text="level1Level2Sig" />
          <!-- first matched line -->
          <has_text text="M349.2383T700" />
          <has_text text="-0.49037231902" />
          <has_text text="-2111932280.94" />
          <has_text text="0.4914638" />
          <has_text text="0.01302117" />
          <has_text text="-0.049216260" />
          <has_text text="-0.00152098716" />
          <has_text text="2.0603074801" />
          <has_text text="-0.60020597" />
          <has_text text="-0.3623876130" />
          <!-- second matched line -->
          <has_text text="M207.9308T206" />
          <has_text text="0.504885262" />
          <has_text text="293403792" />
          <has_text text="0.207196379" />
          <has_text text="0.04438632" />
          <has_text text="0.020749097" />
          <has_text text="0.005184709" />
          <has_text text="1.47082346" />
          <has_text text="2.24325407" />
          <has_text text="0.38157919" />
          <has_text text="0.610536188" />
        </assert_contents>
      </output>
      <output name="contrast_salience">
        <assert_contents>
          <!-- column-labels line -->
          <has_text text="featureID" />
          <has_text text="salientLevel" />
          <has_text text="salienceRCV" />
          <has_text text="salience" />
          <!-- first three matched lines -->
          <has_text text="M207.0654T373" /><has_text text="k4" /><has_text text="0.822733190" /><has_text text="134.087771" /><has_text text="3.9994434" /><has_text text="207.0654" /><has_text text="373" />
          <has_text text="M222.9585T226" /><has_text text="k2" /><has_text text="0.761200229" /><has_text text="87.3672719" /><has_text text="3.9995358" /><has_text text="222.9585" /><has_text text="226" />
          <has_text text="M235.0975T362" /><has_text text="k4" /><has_text text="0.209363850" /><has_text text="77.6255643" /><has_text text="3.99606600" /><has_text text="235.0975" /><has_text text="362" />
        </assert_contents>
      </output>
    </test>
    <!-- test #5 -->
    <test>
      <param name="dataMatrix_in" value="input_dataMatrix.tsv"/>
      <param name="sampleMetadata_in" value="input_sampleMetadata.tsv"/>
      <param name="variableMetadata_in" value="input_variableMetadata.tsv"/>
      <param name="tesC" value="kruskal"/>
      <param name="facC" value="k10"/>
      <param name="pairSigFeatOnly" value="TRUE"/>
      <param name="labelFeatures" value="3"/>
      <param name="min_crossval_i" value="7"/>
      <param name="fdr_features" value="ALL"/>
      <param name="levCSV" value="k[12],k[3-4]"/>
      <param name="matchingC" value="regex"/>
      <output name="contrast_corcov">
        <assert_contents>
          <!-- column-labels line -->
          <has_text text="featureID" />
          <has_text text="factorLevel1" />
          <has_text text="factorLevel2" />
          <has_text text="correlation" />
          <has_text text="covariance" />
          <has_text text="vip4p" />
          <has_text text="vip4o" />
          <has_text text="level1Level2Sig" />
          <!-- first matched line -->
          <has_text text="M200.005T296" />
          <has_text text="0.0050579682" />
          <has_text text="2607493" />
          <has_text text="0.1157346" />
          <has_text text="0.0647860" />
        </assert_contents>
      </output>
      <output name="contrast_salience">
        <assert_contents>
          <!-- column-labels line -->
          <has_text text="featureID" />
          <has_text text="salientLevel" />
          <has_text text="salienceRCV" />
          <has_text text="salience" />
          <!-- first three matched lines -->
          <has_text text="M207.0654T373" /><has_text text="k4" /><has_text text="0.822733190" /><has_text text="134.087771" /><has_text text="3.9994434" /><has_text text="207.0654" /><has_text text="373" />
          <has_text text="M222.9585T226" /><has_text text="k2" /><has_text text="0.761200229" /><has_text text="87.3672719" /><has_text text="3.9995358" /><has_text text="222.9585" /><has_text text="226" />
          <has_text text="M235.0975T362" /><has_text text="k4" /><has_text text="0.209363850" /><has_text text="77.6255643" /><has_text text="3.99606600" /><has_text text="235.0975" /><has_text text="362" />
        </assert_contents>
      </output>
    </test>
    <!-- test #6 -->
    <test>
      <param name="dataMatrix_in" value="input_dataMatrix.tsv"/>
      <param name="sampleMetadata_in" value="input_sampleMetadata.tsv"/>
      <param name="variableMetadata_in" value="input_variableMetadata.tsv"/>
      <param name="tesC" value="none"/>
      <param name="facC" value="k10"/>
      <param name="labelFeatures" value="3"/>
      <param name="min_crossval_i" value="7"/>
      <param name="fdr_features" value="ALL"/>
      <param name="levCSV" value="k[12],k[3-4]"/>
      <param name="matchingC" value="regex"/>
      <output name="contrast_corcov">
        <assert_contents>
          <!-- column-labels line -->
          <has_text text="featureID" />
          <has_text text="factorLevel1" />
          <has_text text="factorLevel2" />
          <has_text text="correlation" />
          <has_text text="covariance" />
          <has_text text="vip4p" />
          <has_text text="vip4o" />
          <!-- first matched line -->
          <has_text text="M349.2383T700" />
          <has_text text="-0.499225" />
          <has_text text="-2135165209" />
          <has_text text="0.5246766" />
          <has_text text="0.0103341" />
          <!-- second matched line -->
          <has_text text="M207.9308T206" />
          <has_text text="0.4927151212" />
          <has_text text="284608538" />
          <has_text text="0.2111623" />
          <has_text text="0.0488654" />
        </assert_contents>
      </output>
      <output name="contrast_salience">
        <assert_contents>
          <!-- column-labels line -->
          <has_text text="featureID" />
          <has_text text="salientLevel" />
          <has_text text="salienceRCV" />
          <has_text text="salience" />
          <!-- first three matched lines -->
          <has_text text="M207.0654T373" /><has_text text="k4" /><has_text text="0.822733190" /><has_text text="134.087771" /><has_text text="3.9994434" /><has_text text="207.0654" /><has_text text="373" />
          <has_text text="M222.9585T226" /><has_text text="k2" /><has_text text="0.761200229" /><has_text text="87.3672719" /><has_text text="3.9995358" /><has_text text="222.9585" /><has_text text="226" />
          <has_text text="M235.0975T362" /><has_text text="k4" /><has_text text="0.209363850" /><has_text text="77.6255643" /><has_text text="3.99606600" /><has_text text="235.0975" /><has_text text="362" />
        </assert_contents>
      </output>
    </test>
    <!-- test #7 -->
    <test>
      <param name="dataMatrix_in" value="issue1_input_dataMatrix.tsv"/>
      <param name="sampleMetadata_in" value="issue1_input_sampleMetadata.tsv"/>
      <param name="variableMetadata_in" value="issue1_input_variableMetadata.tsv"/>
      <param name="tesC" value="none"/>
      <param name="facC" value="tissue_flowering"/>
      <param name="labelFeatures" value="3"/>
      <param name="min_crossval_i" value="7"/>
      <param name="fdr_features" value="ALL"/>
      <param name="levCSV" value="*"/>
      <param name="matchingC" value="wildcard"/>
      <output name="contrast_corcov">
        <assert_contents>
          <!-- column-labels line -->
          <has_text text="featureID" />
          <has_text text="factorLevel1" />
          <has_text text="factorLevel2" />
          <has_text text="correlation" />
          <has_text text="covariance" />
          <has_text text="vip4p" />
          <has_text text="vip4o" />
          <!-- first matched line -->
          <has_text text="NM516T251" />
          <has_text text="flower_yes" />
          <has_text text="other" />
          <has_text text="0.3499550705" />
          <has_text text="11.609255" />
          <has_text text="0.43664386" />
          <has_text text="0.587701897" />
          <has_text text="0.026082688" />
          <has_text text="0.0437742145" />
          <has_text text="516.0845" />
          <has_text text="250.8762" />
        </assert_contents>
      </output>
      <output name="contrast_salience">
        <assert_contents>
          <!-- column-labels line -->
          <has_text text="featureID" />
          <has_text text="salientLevel" />
          <has_text text="salienceRCV" />
          <has_text text="salience" />
          <has_text text="mz" />
          <has_text text="rt" />
          <!-- first matched line -->
          <has_text text="NM517T428" /><has_text text="flower_yes" /><has_text text="0.02765631" /><has_text text="7.8343993" /><has_text text="1.27813793" /><has_text text="517.121714" /><has_text text="428.306854248" />
          <has_text text="NM517T426_1" /><has_text text="0.0290065" /><has_text text="7.7151305" /><has_text text="1.2758886" /><has_text text="517.09367125" /><has_text text="426.233886719" />
          <has_text text="NM516T284_2" /><has_text text="0.022883902" /><has_text text="7.6379724" /><has_text text="1.25603610" /><has_text text="516.082005177" /><has_text text="283.569198608" />
        </assert_contents>
      </output>
    </test>
    <!-- test #8 -->
    <test>
      <param name="dataMatrix_in" value="input_dataMatrix.tsv"/>
      <param name="sampleMetadata_in" value="issue6_input_sampleMetadata.tsv"/>
      <param name="variableMetadata_in" value="input_variableMetadata.tsv"/>
      <param name="tesC" value="none"/>
      <param name="facC" value="k._10"/>
      <param name="labelFeatures" value="3"/>
      <param name="min_crossval_i" value="7"/>
      <param name="fdr_features" value="ALL"/>
      <param name="levCSV" value="k1,k.2"/>
      <param name="matchingC" value="none"/>
      <output name="contrast_corcov">
        <assert_contents>
          <!-- column-labels line -->
          <has_text text="featureID" />
          <has_text text="factorLevel1" />
          <has_text text="factorLevel2" />
          <has_text text="correlation" />
          <has_text text="covariance" />
          <has_text text="vip4p" />
          <has_text text="vip4o" />
          <!-- first matched line -->
          <has_text text="M349.2383T700" />
          <has_text text="0.61594030" />
          <has_text text="3489481837.9" />
          <has_text text="0.54672558" />
          <has_text text="0.3920409" />
          <!-- second matched line -->
          <has_text text="M207.9308T206" />
          <has_text text="-0.89716403" />
          <has_text text="-585563327.7" />
          <has_text text="0.270297" />
          <has_text text="0.037661" />
        </assert_contents>
      </output>
    </test>
  </tests>
  <help><![CDATA[

**Run OPLS-DA Contrasts of Univariate Results**
-----------------------------------------------

**Author** - Arthur Eschenlauer (University of Minnesota, esch0041@umn.edu)

**Release Notes** - https://github.com/HegemanLab/w4mcorcov_galaxy_wrapper#release-notes

Motivation
----------

OPLS-DA and the SIMCA S-PLOT (Wiklund *et al.*, 2008) may be employed to draw attention to metabolomic features that are potential biomarkers, i.e. features that are potentially useful when assigning a sample to one of two classes (e.g. Sun *et al.*, 2016).  Workflow4Metabolomics (W4M, Giacomoni *et al.*, 2014, Guitton *et al.*, 2017) provides a suite of tools for preprocessing and statistical analysis of LC-MS, GC-MS, and NMR metabolomics data; however, it does not (as of release 3.2) include a tool for making the equivalent of an S-PLOT.

The S-PLOT is computed from mean-centered, pareto-scaled data.  This plot presents the correlation of the first score vector from an OPLS-DA model with the sample-variables used to produce that model versus the covariance of the scores with the sample-variables.  For OPLS-DA, the first score vector represents the variation among the sample-variables that is related to the predictor (i.e., the contrasting factor); the second score vector, variation that is orthogonal to the predictor.

The primary aims of this tool are:

- To compute and visualize multiple contrasts with OPLS-DA and the covariance vs. correlation plot.
- To write the results to data files for use in further multivariate analysis or visualization.

Note: This tool only supports categorical factors with non-numeric level-names.

Description
-----------

The purpose of the 'OPLS-DA Contrasts' tool is to visualize GC-MS or LC-MS features that are possible biomarkers.

The W4M 'Univariate' tool (Th]]>&#233;<![CDATA[venot *et al.*, 2015) adds the results of family-wise corrected pairwise significance-tests as columns of the **variableMetadata** dataset.
For instance, if Kruskal-Wallis testing were perfomred on a column named 'cluster' in sampleMetadata that has values 'k1' and 'k2' and at least one other value:

- A column of variableMetadata would be labelled 'cluster_kruskal_sig' and would have values '1' and '0'; when the samples are grouped by 'cluster', '1' means that there is strong evidence against the hypothesis that there is no difference among the intensities for the feature across all sample-groups.
- A column of variableMetadata would be labelled 'cluster_kruskal_k1.k2_sig' and would have values '1' and '0', where '1' means that there is significant evidence against the hypothesis that samples from sampleMetadata whose 'cluster' column contains 'k1' or 'k2' have the same intensity for that feature.

The 'OPLS-DA Contrasts' tool produces graphics and data for OPLS-DA contrasts of feature-intensities between significantly different pairs of factor-levels.  For each factor-level, the tool performs a contrast with all other factor-levels combined and then separately with each other factor-level.

**Along the left-to-right axis, the plots show the supervised projection of the variation explained by the predictor** (i.e., the factor specified when invoking the tool); **the top-to-bottom axis displays the variation that is orthogonal to the predictor level** (i.e., independent of it).

Although this tool can be used in a purely exploratory manner by supplying the variableMetadata file without the columns added by the W4M 'Univariate' tool, **a preferable workflow may be to use univariate testing to exclude features that are not significantly different and then to use OPLS-DA to visualize the differences identified in univariate testing** (Th]]>&#233;<![CDATA[venot *et al.*, 2015); an appropriate exception would be to visualize contrasts of a specific list of metabolites.  If you do exclude features, however, make sure that you set the advanced parameter "How many features for p-value calculation?" accordingly.

It must be stressed that there may be no *single* definitive computational approach to select features that are reliable biomarkers, especially from a small number of samples or experiments.  A few possible choices are:

- picking features with maximum loadings along the projection parallel to the predictor (loadp),
- examining extreme values on S-PLOTs
- examining "variable importance in projection VIP for OPLS-DA" (Galindo-Prieto *et al.* 2014), and
- examining a feature's "selectivity ratio" (Rajalahti *et al.*, 2009).

In this spirit, this tool reports the S-PLOT covariance and correlation (Wiklund *op. cit.*) and VIP metrics, and it introduces an informal "salience" metric to flag features that may merit attention without dimensional reduction; future versions may add selectivity ratio.

For a more systematic approach to biomarker identification, please consider the W4M 'biosigner' tool (Rinuardo *et al.* 2016), which applies three different identification metrics to the selection process.  Regardless of how any potential biomarker is identified, further validation analysis (e.g., independent confirmatory experiments) is needed before it can be recommended for general application.


W4M Workflow Position
---------------------

- Upstream tool: **Univariate** (category: Statistical Analysis) or any **Preprocessing** tool that produces or updates a 'variableMetadata' file.
- Downstream tool categories: **Statistical Analysis**

Input files
-----------

  +----------------------+-----------+
  | File                 |  Format   |
  +======================+===========+
  | Data matrix          |  tabular  |
  +----------------------+-----------+
  | Sample metadata      |  tabular  |
  +----------------------+-----------+
  | Variable metadata    |  tabular  |
  +----------------------+-----------+

Output files
------------

  +-------------------------------------------+-----------+
  | File                                      |  Format   |
  +===========================================+===========+
  | Contrast detail                           |    pdf    |
  +-------------------------------------------+-----------+
  | Contrast "corrlation and covariance" data |  tabular  |
  +-------------------------------------------+-----------+
  | Feature "salience" data                   |  tabular  |
  +-------------------------------------------+-----------+

Parameters
----------

[IN] Data matrix file
  | variable x sample **dataMatrix** (tabular separated values) file of the numeric data matrix, with '.' as decimal, and 'NA' for missing values; the table must not contain metadata apart from row and column names; the row and column names must be identical to the rownames of the sample and variable metadata, respectively (see below)
  |

[IN] Sample metadata file
  | sample x metadata **sampleMetadata** (tabular separated values) file of the numeric and/or character sample metadata, with '.' as decimal and 'NA' for missing values
  |

[IN] Variable metadata file
  | variable x metadata **variableMetadata** (tabular separated values) file of the numeric and/or character variable metadata, with '.' as decimal and 'NA' for missing values
  |

[IN] Test
  | Name of the **statistical test** - a component of column names in variable metadata table
  | May be one of 'none', 'ttest', 'gwilcoxon', 'anova', 'kruskal', 'pearson', 'spearman'
  |

[IN] Factor of interest
  | Name of the **column of sampleMetadata** corresponding to the qualitative or quantitative variable
  |

[IN] Retain only pairwise-significant features
  | *Note that when 'Test' is 'none', all features are included in the analysis and this parameter is not settable.*
  | When **true**, for each contrast of two levels, include only those features which pass the significance threshold for that contrast.  Choosing true results in an OPLS-DA model that better reflects and visualizes the difference detected by univariate analysis, with somewhat increased reliability of prediction (as assessed by cross-validation).
  | When **false**, include all features that pass the significance threshold when testing for difference across all factor-levels.  This choice produces a plot that displays more features but is not necessarily more informative.
  |

[IN] Levels of interest
  | Comma-separated **level-names** (or comma-less regular expressions to match level-names) to consider in analysis; must match at least two levels; may include wild cards or regular expressions.
  |

[IN] Level-name matching
  | Indicator of **how levels are to be specified generically** (if at all) - wild cards, regular expressions, or none (no generic matching).
  |

[IN] Label how many extreme features
  | Specify the number of features at each of the loading-extremes that should be labelled (with the name of the feature) on the covariance-vs.-correlation plot; specify 'ALL' to label all features; this choice has no effect on the OPLS-DA loadings plot.
  |

[IN] (Advanced) C-plot Y-axis
  | Choose whether C-plots should plot the correlation (the default) or the covariance *vs.* VIP.
  |

[IN] (Advanced) Produce predictor C-plot
  | Choose whether a C-plot should be produced for the projections parallel to the predictor.
  |

[IN] (Advanced) Produce orthogonal C-plot
  | Choose whether a C-plot should be produced for the projections orthogonal to the predictor.
  |

[IN] (Advanced) How many features for p-value calculation?
  | You will need to use this option when statistical criteria have previously been applied to remove features in the data matrix.  This is important for adjusting the p-values for correlation of the scores with each feature; this adjustment is necessary to avoid underestimation of the p-values.  If this is applicable, specify the sum of the number of features removed and the number of features in the data matrix.
  |

[OUT] Contrast-detail output PDF
  | File containing several plots for each two-projection OPLS-DA analysis.

- (first row, left) **correlation-versus-covariance plot** of OPLS-DA results

    - This is a work-alike for the S-PLOT described in Wiklund, (*op. cit.*), ignoring samples with missing values;
    - point-color becomes saturated as the "variable importance in projection to the predictive components" (VIP\ :subscript:`4,p` from Galindo-Prieto *op. cit.*) ranges from 0.83 and 1.21 (Mehmood *et al.* 2012), for use to identify features for consideration as biomarkers;
    - plot symbols are diamonds when the p-value of the correlation, adjusted for family-wise error rate (Yekutieli *et al.*, 2001), is greater than 0.05, circles when it is less than 0.01, and triangles when between 0.01 and 0.05.
- (second row, left) **model-overview plot** for the two projections; grey bars are the correlation coefficient for the fitted data; black bars indicate performance in cross-validation tests (Th]]>&#233;<![CDATA[venot, 2017)
- (first row, right) OPLS-DA **scores-plot** for the two projections (Th]]>&#233;<![CDATA[venot *et al.*, 2015)
- (second row, right) **correlation-versus-covariance plot** of OPLS-DA results **orthogonal to the predictor** (see section "S-Plot of Orthogonal Component" in Wiklund, *op. cit.*, pp. 120-121; this characterizes variation of features that is *independent of the predictor*).
- (third row, left, when "**predictor C-plot**" is chosen under "Advanced") plot of the correlation (or covariance) vs. the VIP\ :subscript:`4,p` (Galindo-Prieto *op. cit.*), to assist in identifying features for consideration as biomarkers.
- (third row, right, when "**orthogonal C-plot**" is chosen under "Advanced") plot of the correlation (or covariance) vs. the VIP\ :subscript:`4,o` (*ibid.*), to assist in identifying features varying considerably without regard to the predictor.

[OUT] Contrast Correlation-Covarinace data TABULAR
  | A tab-separated values file of metadata for each feature for each contrast in which it was included.
  | Thus, a given feature may appear many times, but *the combination of featureID, factorLevel1, and factorLevel2 will be unique.*
  | This file has the following columns:

- **featureID** - feature identifier
- **factorLevel1** - factor-level 1
- **factorLevel2** - factor-level 2 (or "other" when contrasting factor-level 1 with all other levels)
- **correlation**\ (t\ :subscript:`p`,X\ :subscript:`i`) - for this feature (i), correlation of sample intensities for this feature (X\ :subscript:`i`) with the OPLS-DA projection's first set of scores (t\ :subscript:`p`, i.e., the scores explaining the difference between the features), computed (omitting samples missing values) using the R *stats::cor* function with the 'pearson' method (R Core Team, 2018); this is negative when intensity for level 1 is greater than for level 2
- **covariance**\ (t\ :subscript:`p`,X\ :subscript:`i`) - computed as for correlation but using the R *stats::cov* function (*ibid.*) in lieu of *stats::cor*; this is also negative when intensity for level 1 is greater than for level 2
- **vip4p** - "variable importance in projection" to the predictive projection, VIP\ :subscript:`4,p` (Galindo-Prieto *op. cit.*)
- **vip4o** - "variable importance in projection" to the orthogonal projection, VIP\ :subscript:`4,o` (*ibid.*)
- **loadp** - variable loading for the predictive projection (Wiklund *op. cit.*)
- **loado** - variable loading for the orthogonal projection (*ibid.*)
- **cor_p_val_raw** - p-value for Fisher-transformed correlation (Fisher, 1921; Snedecor, 1980; see also https://en.wikipedia.org/wiki/Fisher_transformation), with no family-wise error-rate correction.
- **cor_p_value** - p-value for Fisher-transformed correlation, adjusted for family-wise error rate (Yekutieli *et al.*, 2001).
- **cor_ci_lower** - lower limit of 95% confidence interval for correlation, based on cor_p_value
- **cor_ci_upper** - upper limit of 95% confidence interval for correlation, based on cor_p_value
- **mz** - *m/z* ratio for feature, copied from input variableMetadata
- **rt** - retention time for feature, copied from input variableMetadata
- **level1Level2Sig** (Only present when a test other than "none" is chosen) - '1' when feature varies significantly across all classes (i.e., not pair-wise); '0' otherwise

[OUT] Feature "Salience" data TABULAR
  | Metrics for the "salient level" for each feature, i.e., the level at which the feature is more prominent than any other level.  This is *not* at all related to the SIMCA OPLS-DA S-PLOT; rather, it is intended as a potential way to discover features for consideration as potential biomarkers without dimensionally reducting the data.  This is a tab-separated values file having the following columns:

- **featureID** - feature identifier
- **salientLevel** - salient level, i.e., for the feature, the class-level having the greatest median intensity
- **salienceRCV** - salience robust coefficient of variation, i.e., for the feature, the mean absolute deviation of the intensity for the salient level divided by the median intensity for the salient level
- **relativeSalientDistance** - relative salient distance, i.e., for the feature, the distance between the two highest class-level medians divided by the square root of the mean absolute deviations of those two class-level's intensities
- **salience** - salience, i.e., for the feature, the median of the class-level having the greatest intensity divided by the mean of the medians for all class-levels
- **mz** - *m/z* ratio for feature, copied from input variableMetadata
- **rt** - retention time for feature, copied from input variableMetadata

Wild card patterns to match level-names
---------------------------------------

"wild card" patterns may be used to select level-names.

- use '``?``' to match a single character
- use '``*``' to match zero or more characters
- the entire pattern must match the level name

For example

- '``??.le*``' matches '``my.level``' but not '``my.own.level``'
- '``*.level``' matches '``my.level``' and '``my.own.level``'
- '``*.level``' matches neither '``my.level``' nor '``my.own.level``'

Regular expression patterns to match level-names
------------------------------------------------

"regular expression" patterns may be used to select level-names.

POSIX 1003.2 standard regular expressions allow precise pattern-matching and are exhaustively defined at:
http://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap09.html

However, only a few basic building blocks of regular expressions need to be mastered for most cases:

- '``^``' matches the beginning of a level-name
- '``$``' matches the end of a level-name
- '``.``' outside of square brackets matches a single character
- '``*``' matches character specified immediately before zero or more times
- Square brackets specify a set of characters to be matched.  Within square brackets:

  - '``^``' as the first character specifies that the list of characters are those that should **not** be matched.
  - '``-``' is used to specify ranges of characters

Caveat: The tool wrapper uses the comma ('``,``') to split a list of sample-level names, so **commas may not be used within regular expressions for this tool.**

First Example: Consider a field of level-names consisting of '``marq3,marq6,marq9,marq12,front3,front6,front9,front12``'

- The regular expression '``^front[0-9][0-9]*$``' will match the same sample-levels as '``front3,front6,front9,front12``'
- The regular expression '``^[a-z][a-z]3$``' will match the same sample-levels as '``front3,marq3``'
- The regular expression '``^[a-z][a-z]12$``' will match the same sample-levels as '``front12,marq12``'
- The regular expression '``^[a-z][a-z][0-9]$``' will match the same sample-levels as '``front3,front6,front9,marq3,marq6,marq9``'

Second Example: Consider these regular expression patterns as possible matches to a sample-level name '``AB0123``':

- '``^[A-Z][A-Z][0-9][0-9]*$``' - MATCHES '``**^AB0123$**``'
- '``^[A-Z][A-Z]*[0-9][0-9]*$``' - MATCHES '``**^AB0123$**``'
- '``^[A-Z][0-9]*``' - MATCHES  '``**^A** B0123$``' - first character is a letter, '``*``' can specify zero characters, and end of line did not need to be matched.
- '``^[A-Z][A-Z][0-9]``' - MATCHES  '``**^AB0** 123$``' - first two characters are letters aind the third is a digit.
- '``^[A-Z][A-Z]*[0-9][0-9]$``' - NO MATCH - the name does not end with the pattern '``[A-Z][0-9][0-9]$``', i.e., it ends with four digits, not two.
- '``^[A-Z][0-9]*$``' - NO MATCH - the pattern specifies that second character and all those that follow, if present, must be digits.

Working examples
----------------

**Input files**

  +-----------------------------------------------------------------------------------------------------------------------------------------------+
  | Download from URL                                                                                                                             |
  +===============================================================================================================================================+
  | https://raw.githubusercontent.com/HegemanLab/w4mcorcov_galaxy_wrapper/master/tools/w4mcorcov/test-data/input_dataMatrix.tsv                   |
  +-----------------------------------------------------------------------------------------------------------------------------------------------+
  | https://raw.githubusercontent.com/HegemanLab/w4mcorcov_galaxy_wrapper/master/tools/w4mcorcov/test-data/input_sampleMetadata.tsv               |
  +-----------------------------------------------------------------------------------------------------------------------------------------------+
  | https://raw.githubusercontent.com/HegemanLab/w4mcorcov_galaxy_wrapper/master/tools/w4mcorcov/test-data/input_variableMetadata.tsv             |
  +-----------------------------------------------------------------------------------------------------------------------------------------------+

**Example 1:** Include in the analysis only features identified as pair-wise significant in the Univariate test.

  +--------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------+
  | Input Parameter or Result                  | Value                                                                                                                                  |
  +============================================+========================================================================================================================================+
  | Factor of interest                         | k10                                                                                                                                    |
  +--------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------+
  | Univariate Significance-Test               | kruskal                                                                                                                                |
  +--------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------+
  | Retain only pairwise-significant features  | Yes                                                                                                                                    |
  +--------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------+
  | Levels of interest                         | k[12],k[3-4]                                                                                                                           |
  +--------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------+
  | Level-name matching                        | use regular expressions for matching level-names                                                                                       |
  +--------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------+
  | Number of features having extreme loadings | ALL                                                                                                                                    |
  +--------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------+
  | How many features for p-value calculation? | 250                                                                                                                                    |
  +--------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------+
  | Output primary table                       | https://raw.githubusercontent.com/HegemanLab/w4mcorcov_galaxy_wrapper/master/tools/w4mcorcov/test-data/expected_contrast_corcov.tsv    |
  +--------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------+
  | Output salience table                      | https://raw.githubusercontent.com/HegemanLab/w4mcorcov_galaxy_wrapper/master/tools/w4mcorcov/test-data/expected_contrast_salience.tsv  |
  +--------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------+
  | Output figures PDF                         | https://raw.githubusercontent.com/HegemanLab/w4mcorcov_galaxy_wrapper/master/tools/w4mcorcov/test-data/expected_contrast_detail.pdf    |
  +--------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------+

**Example 2:** Include in the analysis only features identified as overall-significant in the Univariate test.  Note that this even includes these features in contrasts where they were not determined to be pair-wise significant in the Univariate test.  Thus, more features are included than in Example 1.

  +--------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------+
  | Input Parameter or Result                  | Value                                                                                                                                      |
  +============================================+============================================================================================================================================+
  | Factor of interest                         | k10                                                                                                                                        |
  +--------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------+
  | Univariate Significance-Test               | kruskal                                                                                                                                    |
  +--------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------+
  | Retain only pairwise-significant features  | No                                                                                                                                         |
  +--------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------+
  | Levels of interest                         | ``*``                                                                                                                                      |
  +--------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------+
  | Level-name matching                        | use wild cards for matching level-names                                                                                                    |
  +--------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------+
  | Number of features having extreme loadings | 5                                                                                                                                          |
  +--------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------+
  | How many features for p-value calculation? | ALL                                                                                                                                        |
  +--------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------+
  | Output primary table                       | https://raw.githubusercontent.com/HegemanLab/w4mcorcov_galaxy_wrapper/master/tools/w4mcorcov/test-data/expected_contrast_corcov_all.tsv    |
  +--------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------+
  | Output salience table                      | https://raw.githubusercontent.com/HegemanLab/w4mcorcov_galaxy_wrapper/master/tools/w4mcorcov/test-data/expected_contrast_salience_all.tsv  |
  +--------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------+
  | Output figures PDF                         | https://raw.githubusercontent.com/HegemanLab/w4mcorcov_galaxy_wrapper/master/tools/w4mcorcov/test-data/expected_contrast_detail_all.pdf    |
  +--------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------+

**Example 3:** Include all features in the analysis without regard to Univariate testing.  Univariate testing is not even a pre-requisite to using the tool when 'none' is selected for the test.  Thus, more features are included than in Example 2.

  +--------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------+
  | Input Parameter or Result                  | Value                                                                                                                                        |
  +============================================+==============================================================================================================================================+
  | Factor of interest                         | k10                                                                                                                                          |
  +--------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------+
  | Univariate Significance-Test               | none                                                                                                                                         |
  +--------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------+
  | Levels of interest                         | k[12],k[3-4]                                                                                                                                 |
  +--------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------+
  | Level-name matching                        | use regular expressions for matching level-names                                                                                             |
  +--------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------+
  | Number of features having extreme loadings | 0                                                                                                                                            |
  +--------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------+
  | How many features for p-value calculation? | ALL                                                                                                                                          |
  +--------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------+
  | Output primary table                       | https://raw.githubusercontent.com/HegemanLab/w4mcorcov_galaxy_wrapper/master/tools/w4mcorcov/test-data/expected_contrast_corcov_global.tsv   |
  +--------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------+
  | Output salience table                      | https://raw.githubusercontent.com/HegemanLab/w4mcorcov_galaxy_wrapper/master/tools/w4mcorcov/test-data/expected_contrast_salience_global.tsv |
  +--------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------+
  | Output figures PDF                         | https://raw.githubusercontent.com/HegemanLab/w4mcorcov_galaxy_wrapper/master/tools/w4mcorcov/test-data/expected_contrast_detail_global.pdf   |
  +--------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------+

**Example 4:** Analysis of a two-level factor (including all features).  This suppresses the contrasts of "each factor vs. the aggregate of all the others".

  +--------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------+
  | Input Parameter or Result                  | Value                                                                                                                                        |
  +============================================+==============================================================================================================================================+
  | Factor of interest                         | lohi                                                                                                                                         |
  +--------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------+
  | Univariate Significance-Test               | none                                                                                                                                         |
  +--------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------+
  | Levels of interest                         | low,high                                                                                                                                     |
  +--------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------+
  | Level-name matching                        | use regular expressions for matching level-names                                                                                             |
  +--------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------+
  | Number of features having extreme loadings | 3                                                                                                                                            |
  +--------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------+
  | How many features for p-value calculation? | ALL                                                                                                                                          |
  +--------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------+
  | Output primary table                       | https://raw.githubusercontent.com/HegemanLab/w4mcorcov_galaxy_wrapper/master/tools/w4mcorcov/test-data/expected_contrast_corcov_lohi.tsv     |
  +--------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------+
  | Output salience table                      | https://raw.githubusercontent.com/HegemanLab/w4mcorcov_galaxy_wrapper/master/tools/w4mcorcov/test-data/expected_contrast_salience_lohi.tsv   |
  +--------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------+
  | Output figures PDF                         | https://raw.githubusercontent.com/HegemanLab/w4mcorcov_galaxy_wrapper/master/tools/w4mcorcov/test-data/expected_contrast_detail_lohi.pdf     |
  +--------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------+


Trademarks
----------

OPLS-DA, SIMCA, and S-PLOT are registered trademarks of the Umetrics company.  http://umetrics.com/about-us/trademarks


  ]]></help>
  <citations>
    <!-- this tool -->
    <citation type="doi">10.5281/zenodo.1034784</citation>
    <!-- R project -->
    <citation type="bibtex"><![CDATA[
    @Manual{,
      title = {R: A Language and Environment for Statistical Computing},
      author = {{R Core Team}},
      organization = {R Foundation for Statistical Computing},
      address = {Vienna, Austria},
      year = {2018},
      url = {https://www.R-project.org/},
    }
    ]]></citation>
    <!-- Fisher_1921: Fisher z-transformation of correlation coefficient -->
    <citation type="bibtex"><![CDATA[
    @article{Fisher_1921,
      author = {Fisher, R. A.},
      title = {{On the probable error of a coefficient of correlation deduced from a small sample}},
      journal = {Metron},
      year = {1921},
      volume = {1},
      pages = {3--32},
      note = {Defines the Fisher z-transformation of a coefficient of correlation.  Citation adapted from http://www.citeulike.org/group/894/article/2344770},
      url = {https://digital.library.adelaide.edu.au/dspace/bitstream/2440/15169/1/14.pdf},
    }
    ]]></citation>
    <!-- Galindo_Prieto_2014 Variable influence on projection (VIP) for OPLS -->
    <citation type="doi">10.1002/cem.2627</citation>
    <!-- Giacomoni_2014 W4M 2.5 -->
    <citation type="doi">10.1093/bioinformatics/btu813</citation>
    <!-- Guitton_2017 W4M 3.0 -->
    <citation type="doi">10.1016/j.biocel.2017.07.002</citation>
    <!-- Mehmood_2012 PLS-based variable-selection -->
    <citation type="doi">10.1186/1748-7188-6-27</citation>
    <!-- Rajalahti_2009 Biomarker discovery using selectivity ratio -->
    <citation type="doi">10.1016/j.chemolab.2008.08.004</citation>
    <!-- Rinuardo 2016 -->
    <citation type="doi">10.3389/fmolb.2016.00026</citation>
    <!-- Sun_2016 Urinary Biomarkers for adolescent idiopathic scoliosis -->
    <citation type="doi">10.1038/srep22274</citation>
    <!-- Snedecor_1980: Fisher z-transformation of correlation coefficient -->
    <citation type="bibtex"><![CDATA[
    @book{Snedecor_1980,
      author = {Snedecor, George W. and Cochran, William G.},
      title = {Statistical methods},
      publisher = {Iowa State University Press},
      year = {1980},
      pages = {186},
      isbn = {0813815606},
      language = {eng},
      keyword = {Statistics, Statistics as Topic -- methods},
      lccn = {80014582},
      edition = {7th ed..},
      address = {Ames, Iowa},
    }
    ]]></citation>
    <!-- Thevenot_2015 Urinary metabolome statistics -->
    <citation type="doi">10.1021/acs.jproteome.5b00354</citation>
    <!-- ropls package -->
    <citation type="bibtex"><![CDATA[
    @incollection{Thevenot_ropls_2017,
      author = {Th{\'{e}}venot, Etienne A.},
      title = {ropls: PCA, PLS(-DA) and OPLS(-DA) for multivariate analysis and feature selection of omics data},
      publisher = {bioconductor.org},
      year = {2017},
      doi = {10.18129/B9.bioc.ropls},
      booktitle = {Bioconductor: Open source software for bioinformatics},
      address = {Roswell Park Cancer Institute},
    }
    ]]></citation>
    <!-- Wiklund_2008 OPLS-DA and S-PLOT -->
    <citation type="doi">10.1021/ac0713510</citation>
    <!-- Yekutieli_2001 The control of the false discovery rate in multiple testing under dependency -->
    <citation type="doi">10.1214/aos/1013699998</citation>
  </citations>
  <!--
     vim:et:sw=4:ts=4
-->
</tool>