view mixmodel.xml @ 0:a4d89d47646f draft default tip

planemo upload for repository https://github.com/workflow4metabolomics/tools-metabolomics commit 8d2ca678d973501b60479a8dc3f212eecd56eab8
author workflow4metabolomics
date Mon, 16 May 2022 09:25:01 +0000
parents
children
line wrap: on
line source

<tool id="mixmodel4repeated_measures" name="mixmodel" version="3.1.0" profile="19.01">
    <description>ANOVA for repeated measures statistics</description>

    <requirements>
        <requirement type="package" version="1.1_27">r-lme4</requirement>
        <requirement type="package" version="3.1_3">r-lmertest</requirement>
        <requirement type="package" version="3.2_0">r-ggplot2</requirement>
        <requirement type="package" version="2.3">r-gridExtra</requirement>
        <requirement type="package" version="2.40.0">bioconductor-multtest</requirement>
    </requirements>

    <command detect_errors="exit_code"><![CDATA[
        Rscript '$__tool_directory__/mixmodel_wrapper.R'

        dataMatrix_in '$dataMatrix_in'
        sampleMetadata_in '$sampleMetadata_in'
        variableMetadata_in '$variableMetadata_in'

        fixfact '$fixfact'
        time '$time'
        subject '$subject'
        adjC '$adjC'
        trf  '$trf'
        thrN '$thrN'
        diaR '$diaR'
        dff '$dff'
        rounding '${roundchoice.rounding}'
        #if $roundchoice.rounding == 'yes' :
            decplaces '${roundchoice.decplaces}'
        #end if

        variableMetadata_out '$variableMetadata_out'
        out_graph_pdf '$out_graph_pdf'
        out_estim_pdf '$out_estim_pdf'
        information '$information'

    ]]></command>

    <inputs>
        <param name="dataMatrix_in" label="Data matrix file" type="data" format="tabular" help="variable x sample, decimal: '.', missing: NA, mode: numerical, sep: tabular" />
        <param name="sampleMetadata_in" label="Sample metadata file" type="data" format="tabular" help="sample x metadata, decimal: '.', missing: NA, mode: character and numerical, sep: tabular" />
        <param name="variableMetadata_in" label="Variable metadata file" type="data" format="tabular" help="variable x metadata, decimal: '.', missing: NA, mode: character and numerical, sep: tabular"  />
        <param name="fixfact"  label="Fixed Factor of interest" type="text" help="Name of sample metadata column corresponding to the fixed factor (use none if only time factor"/>
        <param name="time"    label="Repeated factor (time)" type="text" help="Name of the column of the sample metadata table corresponding to the repeated factor"/>
        <param name="subject" label="Subject factor" type="text" help="Name of the column of the sample metadata table corresponding to the subject factor"/>
        <param name="dff" label="Degrees of freedom method for the F-tests" type="select" help="">
            <option value="Satt">Satterthwaite</option>
            <option value="KenR">Kenward-Roger</option>
        </param>
        <param name="adjC" label="Method for multiple testing correction" type="select" help="">
            <option value="fdr">fdr</option>
            <option value="BH">BH</option>
            <option value="bonferroni">bonferroni</option>
            <option value="BY">BY</option>
            <option value="hochberg">hochberg</option>
            <option value="holm">holm</option>
            <option value="hommel">hommel</option>
            <option value="none">none</option>
        </param>
        <param name="trf" label="Log transform of raw data" type="select" help="Transformation of raw data">
            <option value="none">none</option>
            <option value="log10">log10</option>
            <option value="log2">log2</option>
        </param>
        <param name="thrN" type="float" value="0.05" label="(Corrected) p-value significance threshold" help="Must be between 0 and 1"/>
        <param name="diaR" label="Perform diagnostic of the residuals" type="select" help=" Used to assess the quality of models considering distribution of residuals ">
            <option value="yes">yes</option>
            <option value="no">no</option>
        </param>
        <conditional name="roundchoice">
            <param name="rounding" type="select" label="Rounding the result's numerical columns" help="Should the numerical values generated by this tool be rounded to a chosen number of decimal places?">
                <option value="no">No</option>
                <option value="yes">Yes</option>
            </param>
            <when value="yes">
                <param argument="decplaces" type="integer" value="6" label="Number of decimal places for rounding" help="Positive interger" />
            </when>
            <when value="no">
            </when>
        </conditional>
    </inputs>

    <outputs>
        <data name="variableMetadata_out" label="${tool.name}_${variableMetadata_in.name}" format="tabular"/>
        <data name="information" label="${tool.name}_information.txt" format="txt"/>
        <data name="out_estim_pdf" label="${tool.name}_Estimates" format="pdf"/>
        <data name="out_graph_pdf" label="${tool.name}_diagResiduals" format="pdf">
            <filter>diaR == 'yes'</filter>
        </data>
    </outputs>

    <tests>
        <test expect_num_outputs="4">
            <param name="dataMatrix_in" value="TwoFactor_dataMatrix.txt" />
            <param name="sampleMetadata_in" value="TwoFactor_sampleMetadata.txt" />
            <param name="variableMetadata_in" value="TwoFactor_variableMetadata.txt" />
            <param name="fixfact" value="phenotype" />
            <param name="time" value="time" />
            <param name="subject" value="sujet" />
            <param name="adjC" value = "none"/>
            <param name="trf" value = "log10"/>
            <param name="thrN" value = "0.01"/>
            <param name="dff" value = "Satt"/>
            <param name="rounding" value = "yes"/>
            <param name="decplaces" value = "6"/>
            <output name="variableMetadata_out" value="mixmodel_TwoFactor_variableMetadata.txt" />
        </test>
    </tests>

    <help><![CDATA[
.. class:: infomark

**Tool update: See the 'NEWS' section at the bottom of the page**

.. class:: infomark

**Authors** Natacha Lenuzza and Jean-Francois Martin wrote this wrapper of R repeated measure anova statistical tests. 
**Maintainers** Melanie Petera and Marie Tremblay-Franco (INRAE-MetaboHUB)

MetaboHUB: The French National Infrastructure for Metabolomics and Fluxomics (https://www.metabohub.fr/home.html)

.. class:: infomark

**Please cite**

R Core Team (2013). R: A language and Environment for Statistical Computing. http://www.r-project.org

.. class:: infomark

**References**
Kuznetsova A. Brockhoff PB. and Christensen RHB (2017). lmerTest Package: Tests in Linear Mixed Effects Models. Journal of Statistical Software, 82(13), pp. 1–26. doi: 10.18637/jss.v082.i13.
Benjamini Y. and Hochberg Y. (1995). Controlling the false discovery rate: a practical and powerful approach for multiple testing. Journal of the Royal Statistical Society. Series B (Methodological), 57:289-300.


=============
Mixed models
=============

-----------
Description
-----------

The module performs analysis of variance for repeated measures using mixed model


-----------
Input files
-----------

+---------------------------+------------+
| File                      |   Format   |
+===========================+============+
| 1 : Data matrix           |   tabular  |
+---------------------------+------------+
| 2 : Sample metadatx       |   tabular  |
+---------------------------+------------+
| 3 : Variable metadata     |   tabular  |
+---------------------------+------------+


----------
Parameters
----------

Data matrix file
| variable x sample **dataMatrix** tabular separated file of the numeric data matrix, with . as decimal, and NA for missing values; the table must not contain metadata apart from row and column names; the row and column names must be identical to the rownames of the sample and variable metadata, respectively (see below)
|

Sample metadata file
| sample x metadata **sampleMetadata** tabular separated file of the numeric and/or character sample metadata, with . as decimal and NA for missing values
|

Variable metadata file
| variable x metadata **variableMetadata** tabular separated file of the numeric and/or character variable metadata, with . as decimal and NA for missing values
|


Treatment
| Name of the fixed factor in the sample metadata file. Use "none" if you have only a time factor
|

Time
| Name of the repeated (time) factor in the sample metadata file
|

Subject
| Name of the subject (on which the repeated measurement id done) in the sample metadata file
|

Degrees of freedom
| Method to use for degrees of freedom computation 
|

Method for multiple testing correction
| The 7 methods implemented in the 'p.adjust' R function are available and documented as follows:
| "The adjustment methods include the Bonferroni correction ("bonferroni") in which the p-values are multiplied by the number of comparisons. Less conservative corrections are also included by Holm (1979) ("holm"), Hochberg (1988) ("hochberg"), Hommel (1988) ("hommel"), Benjamini and Hochberg (1995) ("BH" or its alias "fdr"), and Benjamini and Yekutieli (2001) ("BY"), respectively. A pass-through option ("none") is also included. The set of methods are contained in the p.adjust.methods vector for the benefit of methods that need to have the method as an option and pass it on to p.adjust. The first four methods are designed to give strong control of the family-wise error rate. There seems no reason to use the unmodified Bonferroni correction because it is dominated by Holm's method, which is also valid under arbitrary assumptions. Hochberg's and Hommel's methods are valid when the hypothesis tests are independent or when they are non-negatively associated (Sarkar, 1998; Sarkar and Chang, 1997). Hommel's method is more powerful than Hochberg's, but the difference is usually small and the Hochberg p-values are faster to compute. The "BH" (aka "fdr") and "BY" method of Benjamini, Hochberg, and Yekutieli control the false discovery rate, the expected proportion of false discoveries amongst the rejected hypotheses. The false discovery rate is a less stringent condition than the family-wise error rate, so these methods are more powerful than the others."


(Corrected) p-value significance threshold
|
|

Rounding the result's numerical columns
| This parameter enables the choice of a given number of decimal places to be used to display results' values in the output columns of the variableMetadata that are generated by the tool.
| If set to "Yes", you can indicate the number of decimal places wanted, and all the results' values will be rounded according to this number.
|

------------
Output files
------------

variableMetadata_out.tabular
| **variableMetadata** file identical to the file given as argument plus
| pvalue of Shapiro normality test of the residuals
| pvalues of the main effects and interaction
| PostHoc test with difference between levels and pvalues of these difference
|

mixedmodel_Estimates
| A pdf file is created with a graphical representation of differences among levels of factors with a color code for significance and an error bar.
| Plots are only provided for features with at least one significant factor. 


mixedmodel_diagResiduals
| if Perform diagnostic of the residuals" is set to yes(default) a pdf file is created with a a serie of
| graphics outputed in order to assess the distribution of residuals to check the adjustment.
| Plots are only provided for features with at least one factor being significant based on the non-corrected p-values. 

information.txt
| File with all messages and warnings generated during the computation
| The list of variables with name and a tag if it is significant for at least fixed or repeated factor.



---------------------------------------------------

Changelog/News
--------------

**Version 3.1.0 - April 2022**

- NEW FEATURE: the tool now can be used without fixed factor by specify "none" in the concerned parameter field.

- NEW FEATURE: addition of the possibility to choose the degrees of freedom computation method (previously using only the Satterthwaite method).

- NEW FEATURE: addition of the possibility to choose the number of decimal places in the variable-metadata numerical columns' output.


    ]]></help>

    <citations>
        <citation type="doi">10.18637/jss.v082.i13.</citation>
        <citation type="bibtex">@ARTICLE{fisher,
           author = {Benjamini Y. and Hochberg Y.,
           title = {Controlling the false discovery rate: a practical and powerful approach for multiple testing. Journal of the Royal Statistical Society},
           journal = {Series B (Methodological)},
           year = {1995},
           volume = {57},
           pages = {289-300}
        }</citation>
        <citation type="doi">10.1093/bioinformatics/btu813</citation>
    </citations>

</tool>