comparison summary_statistics.xml @ 0:46ddb0591d8b draft default tip

planemo upload commit a2411926bebc2ca3bb31215899a9f18a67e59556
author vmarcon
date Thu, 18 Jan 2018 07:44:37 -0500
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:46ddb0591d8b
1 <!--# Copyright (C) 2017 INRA
2 # This program is free software: you can redistribute it and/or modify
3 # it under the terms of the GNU General Public License as published by
4 # the Free Software Foundation, either version 3 of the License, or
5 # (at your option) any later version.
6 #
7 # This program is distributed in the hope that it will be useful,
8 # but WITHOUT ANY WARRANTY; without even the implied warranty of
9 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 # GNU General Public License for more details.
11 #
12 # You should have received a copy of the GNU General Public License
13 # along with this program. If not, see http://www.gnu.org/licenses/.
14 #-->
15
16 <tool id="summary_statistics" name="Summary statistics" version="1.0.0">
17 <description>Produce simple descriptive statistics from a numerical table</description>
18 <requirements>
19 <requirement type="package">R</requirement>
20 <requirement type="package">bioconductor-edger</requirement>
21 <requirement type="package">bioconductor-limma</requirement>
22 <requirement type="package">r-batch</requirement>
23 <requirement type="package">r-locfit</requirement>
24 </requirements>
25 <stdio>
26 <!-- Anything other than zero is an error -->
27 <exit_code range="1:" level="fatal" />
28 <exit_code range=":-1" level="fatal" />
29 </stdio>
30 <command interpreter="Rscript"><![CDATA[
31 summary_statistics_galaxy.R
32 file_in '${file_in}'
33 NA_code '${NA_code}'
34 stat '${stat_cond.stat}'
35 #if $stat_cond.stat =="T":
36 stat_chosen '${stat_cond.stat_chosen}'
37 #end if
38 ploting '${ploting_cond.ploting}'
39 #if $ploting_cond.ploting =="T":
40 plot_chosen '${ploting_cond.plot_chosen}'
41 #end if
42 table_file '${table_file}'
43 graph_file '${graph_file}'
44 log_file '${log_file}'
45 ]]></command>
46 <inputs>
47 <param format="csv,tabular" name="file_in" type="data" label="Input File" />
48 <param name="NA_code" size="30" type="text" value="NA" label="Label used for Missing values" />
49 <conditional name="stat_cond">
50 <param name="stat" type="select" help="Do you want to compute some basic statistics?" label="Statistics table">
51 <option value="T">Yes</option>
52 <option value="F" selected="true">No</option>
53 </param>
54 <when value="T">
55 <param name="stat_chosen" type="select" display="checkboxes" multiple="True" label="Chosen statistic(s)">
56 <option value="mean">mean</option>
57 <option value="sd">sd</option>
58 <option value="variance">variance</option>
59 <option value="median">median</option>
60 <option value="quartile">quartile</option>
61 <option value="decile">decile</option>
62 <validator type="empty_field" message="Please choose at least one statistic representation" />
63 </param>
64 </when>
65 </conditional>
66 <conditional name="ploting_cond">
67 <param name="ploting" type="select" help="Do you want some standard plots?" label="Plots">
68 <option value="T">Yes</option>
69 <option value="F" selected="true">No</option>
70 </param>
71 <when value="T">
72 <param name="plot_chosen" type="select" help="" display="checkboxes" multiple="True" label="Chosen plot(s)">
73 <option value="boxplot">boxplot</option>
74 <option value="histogram">histogram</option>
75 <option value="density">density</option>
76 <option value="pairsplot">pairsplot</option>
77 <option value="MAplot">MAplot</option>
78 <validator type="empty_field" message="Please choose at least one ploting representation." />
79 </param>
80 </when>
81 </conditional>
82 </inputs>
83 <outputs>
84 <data format="html" name="log_file" label="Summary_statistics_log" />
85 <data format="tabular" name="table_file" label="Summary_statistics_report.tsv" >
86 <filter>(stat_cond['stat'] == 'T')</filter>
87 </data>
88 <data format="pdf" name="graph_file" label="Summary_statistics_report.pdf" >
89 <filter>(ploting_cond['ploting'] == 'T')</filter>
90 </data>
91 </outputs>
92 <tests>
93 <test>
94 <param name="file_in" value="decathlon.tsv"/>
95 <param name="Na_code" value="NA"/>
96 <conditional name="stat_cond">
97 <param name="stat" value="T"/>
98 <param name="stat_chosen" value="mean,sd,variance,median,quartile,decile"/>
99 </conditional>
100 <conditional name="ploting_cond">
101 <param name="ploting" value="T"/>
102 <param name="plot_chosen" value="boxplot,histogram,density,pairsplot,MAplot"/>
103 </conditional>
104 <output name="log_file" file="log_file"/>
105 <output name="table_file" file="table_file"/>
106 <output name="graph_file" file="graph_file" compare="sim_size"/>
107 </test>
108 </tests>
109 <help><![CDATA[
110
111 ==================
112 Summary statistics
113 ==================
114
115 -----------
116 Description
117 -----------
118
119 - This tool is part of a set of statistical tools made by members of the BIOS4BIOL group ("Normalization", "Summary statistics", "Hierarchical clustering" and "PCAFactoMineR").
120 - Please use the Normalization module that come with the suite before using this module.
121
122
123 What it does:
124 - This program produces simple descriptive statistics from a numerical table. Statistical measures are computed for each column in the table.
125
126 ------
127
128 -----------
129 Input files
130 -----------
131
132 +---------------------------+------------+
133 | Parameter : num + label | Format |
134 +===========================+============+
135 | 1 : input file | tabular |
136 +---------------------------+------------+
137
138 The input table should be a tabulation-separated file. If your dataset is available in your current history but can not be selected via the Input File drop-down list, it may be due to incorrect format.
139 Please first check that your data are effectively tabulated. If it is, then you can redefine the Galaxy format of your table into "tabular" editing its attributes ("Datatype" panel) [`See the schematic explanation here`_].
140
141
142 .. _See the schematic explanation here: https://raw.githubusercontent.com/IFB-ElixirFr/GFLS/master/summary_statistics/static/images/ChangeDatatype.pdf
143
144 The first line should be a header (naming columns) and each line must begin with a row name, like the example below:
145
146 .. image:: https://raw.githubusercontent.com/IFB-ElixirFr/GFLS/master/summary_statistics/static/images/input_count_file.png
147
148
149
150 ----------
151 Parameters
152 ----------
153
154 Label used for Missing values
155 | Missing value coding characters
156 |
157
158 statistics table
159 | if YES, allow you to choose statistic(s) you want in your report
160 |
161
162 Chosen statistic(s)
163 | select the statistics you want in your report (see above "Available statistics and plots")
164 |
165
166 Plots
167 | if YES, allow you to choose plot(s) you want in your report
168 |
169 Chosen plot(s)
170 | select the plots you want in your report (see above "Available statistics and plots")
171 |
172
173 ------------------------------
174 Available statistics and plots
175 ------------------------------
176
177 **Numerical statistical measures provided are the following ones:**
178
179
180 .. image:: https://raw.githubusercontent.com/IFB-ElixirFr/GFLS/master/summary_statistics/static/images/descriptive_stat_all.png
181 :width: 500
182
183
184 **Available plots:**
185
186
187 * boxplot
188
189
190 .. image:: https://raw.githubusercontent.com/IFB-ElixirFr/GFLS/master/summary_statistics/static/images/descriptive_stat_boxplot.png
191
192 (source : SAS documentation)
193
194 * histogram
195
196
197 .. image:: https://raw.githubusercontent.com/IFB-ElixirFr/GFLS/master/summary_statistics/static/images/descriptive_stat_histo.png
198 :width: 285
199
200 In this example, about 45 values of the dataset are greater than 0 and lower than 0.5
201
202 * density
203
204
205 .. image:: https://raw.githubusercontent.com/IFB-ElixirFr/GFLS/master/summary_statistics/static/images/descriptive_stat_density.png
206 :width: 275
207
208 This option computes kernel density estimates (gaussian smoothing).
209 While a histogram displays the observed distribution of a numerical variable, a density plot allows to view the estimated distribution of the theoretical continuous variable.
210
211 * pairsplot
212
213
214 .. image:: https://raw.githubusercontent.com/IFB-ElixirFr/GFLS/master/summary_statistics/static/images/descriptive_stat_pairs.png
215 :width: 475
216
217 In this example, we have represented a pairs plot for a table with three columns, named
218 "a", "b" and "c". Each plot represents the values of a given column scaled on the x axis versus
219 the values of another column scaled on the y axis.
220
221 * MAplot
222
223
224 .. image:: https://raw.githubusercontent.com/IFB-ElixirFr/GFLS/master/summary_statistics/static/images/descriptive_stat_maplot.png
225 :width: 275
226
227 Designed for genomic data (count data - only positive values accepted).
228 Each plot allows to compare two samples.
229 The ordinate axis (M) represents the log ratios (binary logarithm) whereas the abscissa axis (A) corresponds to the means of log values.
230
231
232
233 ------------
234 Output files
235 ------------
236
237 Summary_statistics_report.tsv
238 | contains a table with all the requested statistics
239 |
240
241 Summary_statistics_report.pdf
242 | contains all the requested graphics
243 |
244
245 Summary_statistics_log
246 |
247 |
248
249 ------
250
251 **Authors** Luc Jouneau (luc.jouneau@inra.fr), Mélanie Pétéra (melanie.petera@inra.fr), Sarah Maman (sarah.maman@inra.fr) and Valentin Marcon (valentin.marcon@inra.fr)
252
253 Contact : support.sigenae@inra.fr
254
255 E-learning available : Not yet.
256
257 - Information :
258
259 Tool coded in the R language:
260 | *R Core Team. R: A language and environment for statistical computing. R*
261 | *Foundation for Statistical Computing, Vienna, Austria. URL https://www.R-project.org/.*
262
263 .. class:: infomark
264
265 -------------
266 Please cite :
267 -------------
268
269 - (Depending on the help provided you can cite us in acknowledgements, references or both.)
270
271 Acknowledgements
272 | We wish to thank SIGENAE group and the statistical CATI BIOS4Biol group : Mélanie Pétéra, Sarah Maman, Luc Jouneau
273 | Re-packaging was provided by Valentin Marcon (INRA, Migale platform http://migale.jouy.inra.fr), as part of the IFB project 'Galaxy For Life Science' (http://www.france-bioinformatique.fr/fr)
274 |
275
276 References
277 | SIGENAE [http://www.sigenae.org/]
278
279 ]]></help>
280 </tool>