Mercurial > repos > vmarcon > summary_statistics
diff summary_statistics.xml @ 0:46ddb0591d8b draft default tip
planemo upload commit a2411926bebc2ca3bb31215899a9f18a67e59556
author | vmarcon |
---|---|
date | Thu, 18 Jan 2018 07:44:37 -0500 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/summary_statistics.xml Thu Jan 18 07:44:37 2018 -0500 @@ -0,0 +1,280 @@ +<!--# Copyright (C) 2017 INRA +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see http://www.gnu.org/licenses/. +#--> + +<tool id="summary_statistics" name="Summary statistics" version="1.0.0"> + <description>Produce simple descriptive statistics from a numerical table</description> + <requirements> + <requirement type="package">R</requirement> + <requirement type="package">bioconductor-edger</requirement> + <requirement type="package">bioconductor-limma</requirement> + <requirement type="package">r-batch</requirement> + <requirement type="package">r-locfit</requirement> + </requirements> + <stdio> + <!-- Anything other than zero is an error --> + <exit_code range="1:" level="fatal" /> + <exit_code range=":-1" level="fatal" /> + </stdio> + <command interpreter="Rscript"><![CDATA[ + summary_statistics_galaxy.R + file_in '${file_in}' + NA_code '${NA_code}' + stat '${stat_cond.stat}' + #if $stat_cond.stat =="T": + stat_chosen '${stat_cond.stat_chosen}' + #end if + ploting '${ploting_cond.ploting}' + #if $ploting_cond.ploting =="T": + plot_chosen '${ploting_cond.plot_chosen}' + #end if + table_file '${table_file}' + graph_file '${graph_file}' + log_file '${log_file}' + ]]></command> + <inputs> + <param format="csv,tabular" name="file_in" type="data" label="Input File" /> + <param name="NA_code" size="30" type="text" value="NA" label="Label used for Missing values" /> + <conditional name="stat_cond"> + <param name="stat" type="select" help="Do you want to compute some basic statistics?" label="Statistics table"> + <option value="T">Yes</option> + <option value="F" selected="true">No</option> + </param> + <when value="T"> + <param name="stat_chosen" type="select" display="checkboxes" multiple="True" label="Chosen statistic(s)"> + <option value="mean">mean</option> + <option value="sd">sd</option> + <option value="variance">variance</option> + <option value="median">median</option> + <option value="quartile">quartile</option> + <option value="decile">decile</option> + <validator type="empty_field" message="Please choose at least one statistic representation" /> + </param> + </when> + </conditional> + <conditional name="ploting_cond"> + <param name="ploting" type="select" help="Do you want some standard plots?" label="Plots"> + <option value="T">Yes</option> + <option value="F" selected="true">No</option> + </param> + <when value="T"> + <param name="plot_chosen" type="select" help="" display="checkboxes" multiple="True" label="Chosen plot(s)"> + <option value="boxplot">boxplot</option> + <option value="histogram">histogram</option> + <option value="density">density</option> + <option value="pairsplot">pairsplot</option> + <option value="MAplot">MAplot</option> + <validator type="empty_field" message="Please choose at least one ploting representation." /> + </param> + </when> + </conditional> + </inputs> + <outputs> + <data format="html" name="log_file" label="Summary_statistics_log" /> + <data format="tabular" name="table_file" label="Summary_statistics_report.tsv" > + <filter>(stat_cond['stat'] == 'T')</filter> + </data> + <data format="pdf" name="graph_file" label="Summary_statistics_report.pdf" > + <filter>(ploting_cond['ploting'] == 'T')</filter> + </data> + </outputs> + <tests> + <test> + <param name="file_in" value="decathlon.tsv"/> + <param name="Na_code" value="NA"/> + <conditional name="stat_cond"> + <param name="stat" value="T"/> + <param name="stat_chosen" value="mean,sd,variance,median,quartile,decile"/> + </conditional> + <conditional name="ploting_cond"> + <param name="ploting" value="T"/> + <param name="plot_chosen" value="boxplot,histogram,density,pairsplot,MAplot"/> + </conditional> + <output name="log_file" file="log_file"/> + <output name="table_file" file="table_file"/> + <output name="graph_file" file="graph_file" compare="sim_size"/> + </test> + </tests> + <help><![CDATA[ + +================== +Summary statistics +================== + +----------- +Description +----------- + + - This tool is part of a set of statistical tools made by members of the BIOS4BIOL group ("Normalization", "Summary statistics", "Hierarchical clustering" and "PCAFactoMineR"). + - Please use the Normalization module that come with the suite before using this module. + + +What it does: + - This program produces simple descriptive statistics from a numerical table. Statistical measures are computed for each column in the table. + +------ + +----------- +Input files +----------- + ++---------------------------+------------+ +| Parameter : num + label | Format | ++===========================+============+ +| 1 : input file | tabular | ++---------------------------+------------+ + +The input table should be a tabulation-separated file. If your dataset is available in your current history but can not be selected via the Input File drop-down list, it may be due to incorrect format. +Please first check that your data are effectively tabulated. If it is, then you can redefine the Galaxy format of your table into "tabular" editing its attributes ("Datatype" panel) [`See the schematic explanation here`_]. + + +.. _See the schematic explanation here: https://raw.githubusercontent.com/IFB-ElixirFr/GFLS/master/summary_statistics/static/images/ChangeDatatype.pdf + +The first line should be a header (naming columns) and each line must begin with a row name, like the example below: + +.. image:: https://raw.githubusercontent.com/IFB-ElixirFr/GFLS/master/summary_statistics/static/images/input_count_file.png + + + +---------- +Parameters +---------- + +Label used for Missing values + | Missing value coding characters + | + +statistics table + | if YES, allow you to choose statistic(s) you want in your report + | + +Chosen statistic(s) + | select the statistics you want in your report (see above "Available statistics and plots") + | + +Plots + | if YES, allow you to choose plot(s) you want in your report + | +Chosen plot(s) + | select the plots you want in your report (see above "Available statistics and plots") + | + +------------------------------ +Available statistics and plots +------------------------------ + +**Numerical statistical measures provided are the following ones:** + + +.. image:: https://raw.githubusercontent.com/IFB-ElixirFr/GFLS/master/summary_statistics/static/images/descriptive_stat_all.png + :width: 500 + + +**Available plots:** + + + * boxplot + + +.. image:: https://raw.githubusercontent.com/IFB-ElixirFr/GFLS/master/summary_statistics/static/images/descriptive_stat_boxplot.png + +(source : SAS documentation) + + * histogram + + +.. image:: https://raw.githubusercontent.com/IFB-ElixirFr/GFLS/master/summary_statistics/static/images/descriptive_stat_histo.png + :width: 285 + +In this example, about 45 values of the dataset are greater than 0 and lower than 0.5 + + * density + + +.. image:: https://raw.githubusercontent.com/IFB-ElixirFr/GFLS/master/summary_statistics/static/images/descriptive_stat_density.png + :width: 275 + +This option computes kernel density estimates (gaussian smoothing). +While a histogram displays the observed distribution of a numerical variable, a density plot allows to view the estimated distribution of the theoretical continuous variable. + + * pairsplot + + +.. image:: https://raw.githubusercontent.com/IFB-ElixirFr/GFLS/master/summary_statistics/static/images/descriptive_stat_pairs.png + :width: 475 + +In this example, we have represented a pairs plot for a table with three columns, named +"a", "b" and "c". Each plot represents the values of a given column scaled on the x axis versus +the values of another column scaled on the y axis. + + * MAplot + + +.. image:: https://raw.githubusercontent.com/IFB-ElixirFr/GFLS/master/summary_statistics/static/images/descriptive_stat_maplot.png + :width: 275 + +Designed for genomic data (count data - only positive values accepted). +Each plot allows to compare two samples. +The ordinate axis (M) represents the log ratios (binary logarithm) whereas the abscissa axis (A) corresponds to the means of log values. + + + +------------ +Output files +------------ + +Summary_statistics_report.tsv + | contains a table with all the requested statistics + | + +Summary_statistics_report.pdf + | contains all the requested graphics + | + +Summary_statistics_log + | + | + +------ + +**Authors** Luc Jouneau (luc.jouneau@inra.fr), Mélanie Pétéra (melanie.petera@inra.fr), Sarah Maman (sarah.maman@inra.fr) and Valentin Marcon (valentin.marcon@inra.fr) + +Contact : support.sigenae@inra.fr + +E-learning available : Not yet. + +- Information : + +Tool coded in the R language: + | *R Core Team. R: A language and environment for statistical computing. R* + | *Foundation for Statistical Computing, Vienna, Austria. URL https://www.R-project.org/.* + +.. class:: infomark + +------------- +Please cite : +------------- + +- (Depending on the help provided you can cite us in acknowledgements, references or both.) + +Acknowledgements + | We wish to thank SIGENAE group and the statistical CATI BIOS4Biol group : Mélanie Pétéra, Sarah Maman, Luc Jouneau + | Re-packaging was provided by Valentin Marcon (INRA, Migale platform http://migale.jouy.inra.fr), as part of the IFB project 'Galaxy For Life Science' (http://www.france-bioinformatique.fr/fr) + | + +References + | SIGENAE [http://www.sigenae.org/] + + ]]></help> +</tool>