Mercurial > repos > vmarcon > summary_statistics
comparison summary_statistics.xml @ 0:46ddb0591d8b draft default tip
planemo upload commit a2411926bebc2ca3bb31215899a9f18a67e59556
author | vmarcon |
---|---|
date | Thu, 18 Jan 2018 07:44:37 -0500 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:46ddb0591d8b |
---|---|
1 <!--# Copyright (C) 2017 INRA | |
2 # This program is free software: you can redistribute it and/or modify | |
3 # it under the terms of the GNU General Public License as published by | |
4 # the Free Software Foundation, either version 3 of the License, or | |
5 # (at your option) any later version. | |
6 # | |
7 # This program is distributed in the hope that it will be useful, | |
8 # but WITHOUT ANY WARRANTY; without even the implied warranty of | |
9 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
10 # GNU General Public License for more details. | |
11 # | |
12 # You should have received a copy of the GNU General Public License | |
13 # along with this program. If not, see http://www.gnu.org/licenses/. | |
14 #--> | |
15 | |
16 <tool id="summary_statistics" name="Summary statistics" version="1.0.0"> | |
17 <description>Produce simple descriptive statistics from a numerical table</description> | |
18 <requirements> | |
19 <requirement type="package">R</requirement> | |
20 <requirement type="package">bioconductor-edger</requirement> | |
21 <requirement type="package">bioconductor-limma</requirement> | |
22 <requirement type="package">r-batch</requirement> | |
23 <requirement type="package">r-locfit</requirement> | |
24 </requirements> | |
25 <stdio> | |
26 <!-- Anything other than zero is an error --> | |
27 <exit_code range="1:" level="fatal" /> | |
28 <exit_code range=":-1" level="fatal" /> | |
29 </stdio> | |
30 <command interpreter="Rscript"><![CDATA[ | |
31 summary_statistics_galaxy.R | |
32 file_in '${file_in}' | |
33 NA_code '${NA_code}' | |
34 stat '${stat_cond.stat}' | |
35 #if $stat_cond.stat =="T": | |
36 stat_chosen '${stat_cond.stat_chosen}' | |
37 #end if | |
38 ploting '${ploting_cond.ploting}' | |
39 #if $ploting_cond.ploting =="T": | |
40 plot_chosen '${ploting_cond.plot_chosen}' | |
41 #end if | |
42 table_file '${table_file}' | |
43 graph_file '${graph_file}' | |
44 log_file '${log_file}' | |
45 ]]></command> | |
46 <inputs> | |
47 <param format="csv,tabular" name="file_in" type="data" label="Input File" /> | |
48 <param name="NA_code" size="30" type="text" value="NA" label="Label used for Missing values" /> | |
49 <conditional name="stat_cond"> | |
50 <param name="stat" type="select" help="Do you want to compute some basic statistics?" label="Statistics table"> | |
51 <option value="T">Yes</option> | |
52 <option value="F" selected="true">No</option> | |
53 </param> | |
54 <when value="T"> | |
55 <param name="stat_chosen" type="select" display="checkboxes" multiple="True" label="Chosen statistic(s)"> | |
56 <option value="mean">mean</option> | |
57 <option value="sd">sd</option> | |
58 <option value="variance">variance</option> | |
59 <option value="median">median</option> | |
60 <option value="quartile">quartile</option> | |
61 <option value="decile">decile</option> | |
62 <validator type="empty_field" message="Please choose at least one statistic representation" /> | |
63 </param> | |
64 </when> | |
65 </conditional> | |
66 <conditional name="ploting_cond"> | |
67 <param name="ploting" type="select" help="Do you want some standard plots?" label="Plots"> | |
68 <option value="T">Yes</option> | |
69 <option value="F" selected="true">No</option> | |
70 </param> | |
71 <when value="T"> | |
72 <param name="plot_chosen" type="select" help="" display="checkboxes" multiple="True" label="Chosen plot(s)"> | |
73 <option value="boxplot">boxplot</option> | |
74 <option value="histogram">histogram</option> | |
75 <option value="density">density</option> | |
76 <option value="pairsplot">pairsplot</option> | |
77 <option value="MAplot">MAplot</option> | |
78 <validator type="empty_field" message="Please choose at least one ploting representation." /> | |
79 </param> | |
80 </when> | |
81 </conditional> | |
82 </inputs> | |
83 <outputs> | |
84 <data format="html" name="log_file" label="Summary_statistics_log" /> | |
85 <data format="tabular" name="table_file" label="Summary_statistics_report.tsv" > | |
86 <filter>(stat_cond['stat'] == 'T')</filter> | |
87 </data> | |
88 <data format="pdf" name="graph_file" label="Summary_statistics_report.pdf" > | |
89 <filter>(ploting_cond['ploting'] == 'T')</filter> | |
90 </data> | |
91 </outputs> | |
92 <tests> | |
93 <test> | |
94 <param name="file_in" value="decathlon.tsv"/> | |
95 <param name="Na_code" value="NA"/> | |
96 <conditional name="stat_cond"> | |
97 <param name="stat" value="T"/> | |
98 <param name="stat_chosen" value="mean,sd,variance,median,quartile,decile"/> | |
99 </conditional> | |
100 <conditional name="ploting_cond"> | |
101 <param name="ploting" value="T"/> | |
102 <param name="plot_chosen" value="boxplot,histogram,density,pairsplot,MAplot"/> | |
103 </conditional> | |
104 <output name="log_file" file="log_file"/> | |
105 <output name="table_file" file="table_file"/> | |
106 <output name="graph_file" file="graph_file" compare="sim_size"/> | |
107 </test> | |
108 </tests> | |
109 <help><![CDATA[ | |
110 | |
111 ================== | |
112 Summary statistics | |
113 ================== | |
114 | |
115 ----------- | |
116 Description | |
117 ----------- | |
118 | |
119 - This tool is part of a set of statistical tools made by members of the BIOS4BIOL group ("Normalization", "Summary statistics", "Hierarchical clustering" and "PCAFactoMineR"). | |
120 - Please use the Normalization module that come with the suite before using this module. | |
121 | |
122 | |
123 What it does: | |
124 - This program produces simple descriptive statistics from a numerical table. Statistical measures are computed for each column in the table. | |
125 | |
126 ------ | |
127 | |
128 ----------- | |
129 Input files | |
130 ----------- | |
131 | |
132 +---------------------------+------------+ | |
133 | Parameter : num + label | Format | | |
134 +===========================+============+ | |
135 | 1 : input file | tabular | | |
136 +---------------------------+------------+ | |
137 | |
138 The input table should be a tabulation-separated file. If your dataset is available in your current history but can not be selected via the Input File drop-down list, it may be due to incorrect format. | |
139 Please first check that your data are effectively tabulated. If it is, then you can redefine the Galaxy format of your table into "tabular" editing its attributes ("Datatype" panel) [`See the schematic explanation here`_]. | |
140 | |
141 | |
142 .. _See the schematic explanation here: https://raw.githubusercontent.com/IFB-ElixirFr/GFLS/master/summary_statistics/static/images/ChangeDatatype.pdf | |
143 | |
144 The first line should be a header (naming columns) and each line must begin with a row name, like the example below: | |
145 | |
146 .. image:: https://raw.githubusercontent.com/IFB-ElixirFr/GFLS/master/summary_statistics/static/images/input_count_file.png | |
147 | |
148 | |
149 | |
150 ---------- | |
151 Parameters | |
152 ---------- | |
153 | |
154 Label used for Missing values | |
155 | Missing value coding characters | |
156 | | |
157 | |
158 statistics table | |
159 | if YES, allow you to choose statistic(s) you want in your report | |
160 | | |
161 | |
162 Chosen statistic(s) | |
163 | select the statistics you want in your report (see above "Available statistics and plots") | |
164 | | |
165 | |
166 Plots | |
167 | if YES, allow you to choose plot(s) you want in your report | |
168 | | |
169 Chosen plot(s) | |
170 | select the plots you want in your report (see above "Available statistics and plots") | |
171 | | |
172 | |
173 ------------------------------ | |
174 Available statistics and plots | |
175 ------------------------------ | |
176 | |
177 **Numerical statistical measures provided are the following ones:** | |
178 | |
179 | |
180 .. image:: https://raw.githubusercontent.com/IFB-ElixirFr/GFLS/master/summary_statistics/static/images/descriptive_stat_all.png | |
181 :width: 500 | |
182 | |
183 | |
184 **Available plots:** | |
185 | |
186 | |
187 * boxplot | |
188 | |
189 | |
190 .. image:: https://raw.githubusercontent.com/IFB-ElixirFr/GFLS/master/summary_statistics/static/images/descriptive_stat_boxplot.png | |
191 | |
192 (source : SAS documentation) | |
193 | |
194 * histogram | |
195 | |
196 | |
197 .. image:: https://raw.githubusercontent.com/IFB-ElixirFr/GFLS/master/summary_statistics/static/images/descriptive_stat_histo.png | |
198 :width: 285 | |
199 | |
200 In this example, about 45 values of the dataset are greater than 0 and lower than 0.5 | |
201 | |
202 * density | |
203 | |
204 | |
205 .. image:: https://raw.githubusercontent.com/IFB-ElixirFr/GFLS/master/summary_statistics/static/images/descriptive_stat_density.png | |
206 :width: 275 | |
207 | |
208 This option computes kernel density estimates (gaussian smoothing). | |
209 While a histogram displays the observed distribution of a numerical variable, a density plot allows to view the estimated distribution of the theoretical continuous variable. | |
210 | |
211 * pairsplot | |
212 | |
213 | |
214 .. image:: https://raw.githubusercontent.com/IFB-ElixirFr/GFLS/master/summary_statistics/static/images/descriptive_stat_pairs.png | |
215 :width: 475 | |
216 | |
217 In this example, we have represented a pairs plot for a table with three columns, named | |
218 "a", "b" and "c". Each plot represents the values of a given column scaled on the x axis versus | |
219 the values of another column scaled on the y axis. | |
220 | |
221 * MAplot | |
222 | |
223 | |
224 .. image:: https://raw.githubusercontent.com/IFB-ElixirFr/GFLS/master/summary_statistics/static/images/descriptive_stat_maplot.png | |
225 :width: 275 | |
226 | |
227 Designed for genomic data (count data - only positive values accepted). | |
228 Each plot allows to compare two samples. | |
229 The ordinate axis (M) represents the log ratios (binary logarithm) whereas the abscissa axis (A) corresponds to the means of log values. | |
230 | |
231 | |
232 | |
233 ------------ | |
234 Output files | |
235 ------------ | |
236 | |
237 Summary_statistics_report.tsv | |
238 | contains a table with all the requested statistics | |
239 | | |
240 | |
241 Summary_statistics_report.pdf | |
242 | contains all the requested graphics | |
243 | | |
244 | |
245 Summary_statistics_log | |
246 | | |
247 | | |
248 | |
249 ------ | |
250 | |
251 **Authors** Luc Jouneau (luc.jouneau@inra.fr), Mélanie Pétéra (melanie.petera@inra.fr), Sarah Maman (sarah.maman@inra.fr) and Valentin Marcon (valentin.marcon@inra.fr) | |
252 | |
253 Contact : support.sigenae@inra.fr | |
254 | |
255 E-learning available : Not yet. | |
256 | |
257 - Information : | |
258 | |
259 Tool coded in the R language: | |
260 | *R Core Team. R: A language and environment for statistical computing. R* | |
261 | *Foundation for Statistical Computing, Vienna, Austria. URL https://www.R-project.org/.* | |
262 | |
263 .. class:: infomark | |
264 | |
265 ------------- | |
266 Please cite : | |
267 ------------- | |
268 | |
269 - (Depending on the help provided you can cite us in acknowledgements, references or both.) | |
270 | |
271 Acknowledgements | |
272 | We wish to thank SIGENAE group and the statistical CATI BIOS4Biol group : Mélanie Pétéra, Sarah Maman, Luc Jouneau | |
273 | Re-packaging was provided by Valentin Marcon (INRA, Migale platform http://migale.jouy.inra.fr), as part of the IFB project 'Galaxy For Life Science' (http://www.france-bioinformatique.fr/fr) | |
274 | | |
275 | |
276 References | |
277 | SIGENAE [http://www.sigenae.org/] | |
278 | |
279 ]]></help> | |
280 </tool> |