view summary_statistics.xml @ 0:46ddb0591d8b draft default tip

planemo upload commit a2411926bebc2ca3bb31215899a9f18a67e59556
author vmarcon
date Thu, 18 Jan 2018 07:44:37 -0500
parents
children
line wrap: on
line source

<!--# Copyright (C) 2017 INRA
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
# 
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see http://www.gnu.org/licenses/.
#-->

<tool id="summary_statistics" name="Summary statistics" version="1.0.0">
    <description>Produce simple descriptive statistics from a numerical table</description>
    <requirements>
        <requirement type="package">R</requirement>
        <requirement type="package">bioconductor-edger</requirement>
        <requirement type="package">bioconductor-limma</requirement>
        <requirement type="package">r-batch</requirement>
        <requirement type="package">r-locfit</requirement>
    </requirements>
    <stdio>
        <!-- Anything other than zero is an error -->
        <exit_code range="1:" level="fatal" />
        <exit_code range=":-1" level="fatal" />
    </stdio>
    <command interpreter="Rscript"><![CDATA[
        summary_statistics_galaxy.R
        file_in '${file_in}'
        NA_code '${NA_code}'
        stat '${stat_cond.stat}'
        #if $stat_cond.stat =="T":
          stat_chosen '${stat_cond.stat_chosen}'
        #end if
        ploting  '${ploting_cond.ploting}'
        #if $ploting_cond.ploting =="T":
          plot_chosen '${ploting_cond.plot_chosen}'
        #end if
        table_file '${table_file}'
        graph_file '${graph_file}'
        log_file '${log_file}'
    ]]></command>
    <inputs>
        <param format="csv,tabular" name="file_in" type="data" label="Input File" />
        <param name="NA_code" size="30" type="text" value="NA" label="Label used for Missing values" />
        <conditional name="stat_cond">
            <param name="stat" type="select" help="Do you want to compute some basic statistics?" label="Statistics table">
                <option value="T">Yes</option>
                <option value="F" selected="true">No</option>
            </param>
            <when value="T">
                <param name="stat_chosen" type="select" display="checkboxes" multiple="True" label="Chosen statistic(s)">
                    <option value="mean">mean</option>
                    <option value="sd">sd</option>
                    <option value="variance">variance</option>
                    <option value="median">median</option>
                    <option value="quartile">quartile</option>
                    <option value="decile">decile</option>
                    <validator type="empty_field" message="Please choose at least one statistic representation" />
                </param>
            </when>
        </conditional>
        <conditional name="ploting_cond">
            <param name="ploting" type="select" help="Do you want some standard plots?" label="Plots">
                <option value="T">Yes</option>
                <option value="F" selected="true">No</option>
            </param>
            <when value="T">
                <param name="plot_chosen" type="select" help="" display="checkboxes" multiple="True" label="Chosen plot(s)">
                    <option value="boxplot">boxplot</option>
                    <option value="histogram">histogram</option>
                    <option value="density">density</option>
                    <option value="pairsplot">pairsplot</option>
                    <option value="MAplot">MAplot</option>
                    <validator type="empty_field" message="Please choose at least one ploting representation." />
                </param>
            </when>
        </conditional>
    </inputs>
    <outputs>
        <data format="html" name="log_file" label="Summary_statistics_log" />
        <data format="tabular" name="table_file" label="Summary_statistics_report.tsv" >
            <filter>(stat_cond['stat'] == 'T')</filter>
        </data>
        <data format="pdf" name="graph_file" label="Summary_statistics_report.pdf" >
            <filter>(ploting_cond['ploting'] == 'T')</filter>
        </data>
    </outputs>
    <tests>
        <test>
            <param name="file_in" value="decathlon.tsv"/>
            <param name="Na_code" value="NA"/>
            <conditional name="stat_cond">
                <param name="stat" value="T"/>
                <param name="stat_chosen" value="mean,sd,variance,median,quartile,decile"/>
            </conditional>
            <conditional name="ploting_cond">
                <param name="ploting" value="T"/>
                <param name="plot_chosen" value="boxplot,histogram,density,pairsplot,MAplot"/>
            </conditional>
            <output name="log_file" file="log_file"/>
            <output name="table_file" file="table_file"/>
            <output name="graph_file" file="graph_file" compare="sim_size"/>
        </test>
    </tests>
    <help><![CDATA[

==================
Summary statistics
==================

-----------
Description
-----------

 - This tool is part of a set of statistical tools made by members of the BIOS4BIOL group ("Normalization", "Summary statistics", "Hierarchical clustering" and "PCAFactoMineR").
 - Please use the Normalization module that come with the suite before using this module.


What it does:
 - This program produces simple descriptive statistics from a numerical table. Statistical measures are computed for each column in the table.

------

-----------
Input files
-----------

+---------------------------+------------+
| Parameter : num + label   |   Format   |
+===========================+============+
| 1 : input file            |   tabular  |
+---------------------------+------------+

The input table should be a tabulation-separated file. If your dataset is available in your current history but can not be selected via the Input File drop-down list, it may be due to incorrect format.
Please first check that your data are effectively tabulated. If it is, then you can redefine the Galaxy format of your table into "tabular" editing its attributes ("Datatype" panel) [`See the schematic explanation here`_]. 


.. _See the schematic explanation here: https://raw.githubusercontent.com/IFB-ElixirFr/GFLS/master/summary_statistics/static/images/ChangeDatatype.pdf

The first line should be a header (naming columns) and each line must begin with a row name, like the example below: 

.. image:: https://raw.githubusercontent.com/IFB-ElixirFr/GFLS/master/summary_statistics/static/images/input_count_file.png



----------
Parameters
----------

Label used for Missing values
        | Missing value coding characters
        |

statistics table
        | if YES, allow you to choose statistic(s) you want in your report
        |

Chosen statistic(s)
        | select the statistics you want in your report (see above "Available statistics and plots")
        |

Plots
        | if YES, allow you to choose plot(s) you want in your report
        |
Chosen plot(s)
        | select the plots you want in your report (see above "Available statistics and plots")
        |

------------------------------
Available statistics and plots
------------------------------

**Numerical statistical measures provided are the following ones:**


.. image:: https://raw.githubusercontent.com/IFB-ElixirFr/GFLS/master/summary_statistics/static/images/descriptive_stat_all.png
        :width: 500


**Available plots:**


	* boxplot


.. image:: https://raw.githubusercontent.com/IFB-ElixirFr/GFLS/master/summary_statistics/static/images/descriptive_stat_boxplot.png

(source : SAS documentation)

	* histogram


.. image:: https://raw.githubusercontent.com/IFB-ElixirFr/GFLS/master/summary_statistics/static/images/descriptive_stat_histo.png
        :width: 285

In this example, about 45 values of the dataset are greater than 0 and lower than 0.5

	* density


.. image:: https://raw.githubusercontent.com/IFB-ElixirFr/GFLS/master/summary_statistics/static/images/descriptive_stat_density.png
        :width: 275

This option computes kernel density estimates (gaussian smoothing).
While a histogram displays the observed distribution of a numerical variable, a density plot allows to view the estimated distribution of the theoretical continuous variable. 

	* pairsplot


.. image:: https://raw.githubusercontent.com/IFB-ElixirFr/GFLS/master/summary_statistics/static/images/descriptive_stat_pairs.png
        :width: 475

In this example, we have represented a pairs plot for a table with three columns, named 
"a", "b" and "c". Each plot represents the values of a given column scaled on the x axis versus
the values of another column scaled on the y axis.

	* MAplot


.. image:: https://raw.githubusercontent.com/IFB-ElixirFr/GFLS/master/summary_statistics/static/images/descriptive_stat_maplot.png
        :width: 275

Designed for genomic data (count data - only positive values accepted).
Each plot allows to compare two samples. 
The ordinate axis (M) represents the log ratios (binary logarithm) whereas the abscissa axis (A) corresponds to the means of log values. 



------------
Output files
------------

Summary_statistics_report.tsv
	| contains a table with all the requested statistics
        |
 
Summary_statistics_report.pdf
	| contains all the requested graphics
        | 

Summary_statistics_log
        |
        |
 
------

**Authors** Luc Jouneau (luc.jouneau@inra.fr), Mélanie Pétéra (melanie.petera@inra.fr), Sarah Maman (sarah.maman@inra.fr) and Valentin Marcon (valentin.marcon@inra.fr)

Contact : support.sigenae@inra.fr

E-learning available : Not yet.

- Information :

Tool coded in the R language:
	| *R Core Team. R: A language and environment for statistical computing. R*
	| *Foundation for Statistical Computing, Vienna, Austria. URL https://www.R-project.org/.*

.. class:: infomark

-------------
Please cite :
-------------

- (Depending on the help provided you can cite us in acknowledgements, references or both.)
    
Acknowledgements
        | We wish to thank SIGENAE group and the statistical CATI BIOS4Biol group : Mélanie Pétéra, Sarah Maman, Luc Jouneau
        | Re-packaging was provided by Valentin Marcon (INRA, Migale platform http://migale.jouy.inra.fr), as part of the IFB project 'Galaxy For Life Science' (http://www.france-bioinformatique.fr/fr)
        |   
 
References
        | SIGENAE [http://www.sigenae.org/]

    ]]></help>
</tool>