diff summary_statistics.xml @ 0:46ddb0591d8b draft default tip

planemo upload commit a2411926bebc2ca3bb31215899a9f18a67e59556
author vmarcon
date Thu, 18 Jan 2018 07:44:37 -0500
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/summary_statistics.xml	Thu Jan 18 07:44:37 2018 -0500
@@ -0,0 +1,280 @@
+<!--# Copyright (C) 2017 INRA
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+# 
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see http://www.gnu.org/licenses/.
+#-->
+
+<tool id="summary_statistics" name="Summary statistics" version="1.0.0">
+    <description>Produce simple descriptive statistics from a numerical table</description>
+    <requirements>
+        <requirement type="package">R</requirement>
+        <requirement type="package">bioconductor-edger</requirement>
+        <requirement type="package">bioconductor-limma</requirement>
+        <requirement type="package">r-batch</requirement>
+        <requirement type="package">r-locfit</requirement>
+    </requirements>
+    <stdio>
+        <!-- Anything other than zero is an error -->
+        <exit_code range="1:" level="fatal" />
+        <exit_code range=":-1" level="fatal" />
+    </stdio>
+    <command interpreter="Rscript"><![CDATA[
+        summary_statistics_galaxy.R
+        file_in '${file_in}'
+        NA_code '${NA_code}'
+        stat '${stat_cond.stat}'
+        #if $stat_cond.stat =="T":
+          stat_chosen '${stat_cond.stat_chosen}'
+        #end if
+        ploting  '${ploting_cond.ploting}'
+        #if $ploting_cond.ploting =="T":
+          plot_chosen '${ploting_cond.plot_chosen}'
+        #end if
+        table_file '${table_file}'
+        graph_file '${graph_file}'
+        log_file '${log_file}'
+    ]]></command>
+    <inputs>
+        <param format="csv,tabular" name="file_in" type="data" label="Input File" />
+        <param name="NA_code" size="30" type="text" value="NA" label="Label used for Missing values" />
+        <conditional name="stat_cond">
+            <param name="stat" type="select" help="Do you want to compute some basic statistics?" label="Statistics table">
+                <option value="T">Yes</option>
+                <option value="F" selected="true">No</option>
+            </param>
+            <when value="T">
+                <param name="stat_chosen" type="select" display="checkboxes" multiple="True" label="Chosen statistic(s)">
+                    <option value="mean">mean</option>
+                    <option value="sd">sd</option>
+                    <option value="variance">variance</option>
+                    <option value="median">median</option>
+                    <option value="quartile">quartile</option>
+                    <option value="decile">decile</option>
+                    <validator type="empty_field" message="Please choose at least one statistic representation" />
+                </param>
+            </when>
+        </conditional>
+        <conditional name="ploting_cond">
+            <param name="ploting" type="select" help="Do you want some standard plots?" label="Plots">
+                <option value="T">Yes</option>
+                <option value="F" selected="true">No</option>
+            </param>
+            <when value="T">
+                <param name="plot_chosen" type="select" help="" display="checkboxes" multiple="True" label="Chosen plot(s)">
+                    <option value="boxplot">boxplot</option>
+                    <option value="histogram">histogram</option>
+                    <option value="density">density</option>
+                    <option value="pairsplot">pairsplot</option>
+                    <option value="MAplot">MAplot</option>
+                    <validator type="empty_field" message="Please choose at least one ploting representation." />
+                </param>
+            </when>
+        </conditional>
+    </inputs>
+    <outputs>
+        <data format="html" name="log_file" label="Summary_statistics_log" />
+        <data format="tabular" name="table_file" label="Summary_statistics_report.tsv" >
+            <filter>(stat_cond['stat'] == 'T')</filter>
+        </data>
+        <data format="pdf" name="graph_file" label="Summary_statistics_report.pdf" >
+            <filter>(ploting_cond['ploting'] == 'T')</filter>
+        </data>
+    </outputs>
+    <tests>
+        <test>
+            <param name="file_in" value="decathlon.tsv"/>
+            <param name="Na_code" value="NA"/>
+            <conditional name="stat_cond">
+                <param name="stat" value="T"/>
+                <param name="stat_chosen" value="mean,sd,variance,median,quartile,decile"/>
+            </conditional>
+            <conditional name="ploting_cond">
+                <param name="ploting" value="T"/>
+                <param name="plot_chosen" value="boxplot,histogram,density,pairsplot,MAplot"/>
+            </conditional>
+            <output name="log_file" file="log_file"/>
+            <output name="table_file" file="table_file"/>
+            <output name="graph_file" file="graph_file" compare="sim_size"/>
+        </test>
+    </tests>
+    <help><![CDATA[
+
+==================
+Summary statistics
+==================
+
+-----------
+Description
+-----------
+
+ - This tool is part of a set of statistical tools made by members of the BIOS4BIOL group ("Normalization", "Summary statistics", "Hierarchical clustering" and "PCAFactoMineR").
+ - Please use the Normalization module that come with the suite before using this module.
+
+
+What it does:
+ - This program produces simple descriptive statistics from a numerical table. Statistical measures are computed for each column in the table.
+
+------
+
+-----------
+Input files
+-----------
+
++---------------------------+------------+
+| Parameter : num + label   |   Format   |
++===========================+============+
+| 1 : input file            |   tabular  |
++---------------------------+------------+
+
+The input table should be a tabulation-separated file. If your dataset is available in your current history but can not be selected via the Input File drop-down list, it may be due to incorrect format.
+Please first check that your data are effectively tabulated. If it is, then you can redefine the Galaxy format of your table into "tabular" editing its attributes ("Datatype" panel) [`See the schematic explanation here`_]. 
+
+
+.. _See the schematic explanation here: https://raw.githubusercontent.com/IFB-ElixirFr/GFLS/master/summary_statistics/static/images/ChangeDatatype.pdf
+
+The first line should be a header (naming columns) and each line must begin with a row name, like the example below: 
+
+.. image:: https://raw.githubusercontent.com/IFB-ElixirFr/GFLS/master/summary_statistics/static/images/input_count_file.png
+
+
+
+----------
+Parameters
+----------
+
+Label used for Missing values
+        | Missing value coding characters
+        |
+
+statistics table
+        | if YES, allow you to choose statistic(s) you want in your report
+        |
+
+Chosen statistic(s)
+        | select the statistics you want in your report (see above "Available statistics and plots")
+        |
+
+Plots
+        | if YES, allow you to choose plot(s) you want in your report
+        |
+Chosen plot(s)
+        | select the plots you want in your report (see above "Available statistics and plots")
+        |
+
+------------------------------
+Available statistics and plots
+------------------------------
+
+**Numerical statistical measures provided are the following ones:**
+
+
+.. image:: https://raw.githubusercontent.com/IFB-ElixirFr/GFLS/master/summary_statistics/static/images/descriptive_stat_all.png
+        :width: 500
+
+
+**Available plots:**
+
+
+	* boxplot
+
+
+.. image:: https://raw.githubusercontent.com/IFB-ElixirFr/GFLS/master/summary_statistics/static/images/descriptive_stat_boxplot.png
+
+(source : SAS documentation)
+
+	* histogram
+
+
+.. image:: https://raw.githubusercontent.com/IFB-ElixirFr/GFLS/master/summary_statistics/static/images/descriptive_stat_histo.png
+        :width: 285
+
+In this example, about 45 values of the dataset are greater than 0 and lower than 0.5
+
+	* density
+
+
+.. image:: https://raw.githubusercontent.com/IFB-ElixirFr/GFLS/master/summary_statistics/static/images/descriptive_stat_density.png
+        :width: 275
+
+This option computes kernel density estimates (gaussian smoothing).
+While a histogram displays the observed distribution of a numerical variable, a density plot allows to view the estimated distribution of the theoretical continuous variable. 
+
+	* pairsplot
+
+
+.. image:: https://raw.githubusercontent.com/IFB-ElixirFr/GFLS/master/summary_statistics/static/images/descriptive_stat_pairs.png
+        :width: 475
+
+In this example, we have represented a pairs plot for a table with three columns, named 
+"a", "b" and "c". Each plot represents the values of a given column scaled on the x axis versus
+the values of another column scaled on the y axis.
+
+	* MAplot
+
+
+.. image:: https://raw.githubusercontent.com/IFB-ElixirFr/GFLS/master/summary_statistics/static/images/descriptive_stat_maplot.png
+        :width: 275
+
+Designed for genomic data (count data - only positive values accepted).
+Each plot allows to compare two samples. 
+The ordinate axis (M) represents the log ratios (binary logarithm) whereas the abscissa axis (A) corresponds to the means of log values. 
+
+
+
+------------
+Output files
+------------
+
+Summary_statistics_report.tsv
+	| contains a table with all the requested statistics
+        |
+ 
+Summary_statistics_report.pdf
+	| contains all the requested graphics
+        | 
+
+Summary_statistics_log
+        |
+        |
+ 
+------
+
+**Authors** Luc Jouneau (luc.jouneau@inra.fr), Mélanie Pétéra (melanie.petera@inra.fr), Sarah Maman (sarah.maman@inra.fr) and Valentin Marcon (valentin.marcon@inra.fr)
+
+Contact : support.sigenae@inra.fr
+
+E-learning available : Not yet.
+
+- Information :
+
+Tool coded in the R language:
+	| *R Core Team. R: A language and environment for statistical computing. R*
+	| *Foundation for Statistical Computing, Vienna, Austria. URL https://www.R-project.org/.*
+
+.. class:: infomark
+
+-------------
+Please cite :
+-------------
+
+- (Depending on the help provided you can cite us in acknowledgements, references or both.)
+    
+Acknowledgements
+        | We wish to thank SIGENAE group and the statistical CATI BIOS4Biol group : Mélanie Pétéra, Sarah Maman, Luc Jouneau
+        | Re-packaging was provided by Valentin Marcon (INRA, Migale platform http://migale.jouy.inra.fr), as part of the IFB project 'Galaxy For Life Science' (http://www.france-bioinformatique.fr/fr)
+        |   
+ 
+References
+        | SIGENAE [http://www.sigenae.org/]
+
+    ]]></help>
+</tool>