Mercurial > repos > vmarcon > summary_statistics
comparison summary_statistics.xml @ 0:46ddb0591d8b draft default tip
planemo upload commit a2411926bebc2ca3bb31215899a9f18a67e59556
| author | vmarcon |
|---|---|
| date | Thu, 18 Jan 2018 07:44:37 -0500 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:46ddb0591d8b |
|---|---|
| 1 <!--# Copyright (C) 2017 INRA | |
| 2 # This program is free software: you can redistribute it and/or modify | |
| 3 # it under the terms of the GNU General Public License as published by | |
| 4 # the Free Software Foundation, either version 3 of the License, or | |
| 5 # (at your option) any later version. | |
| 6 # | |
| 7 # This program is distributed in the hope that it will be useful, | |
| 8 # but WITHOUT ANY WARRANTY; without even the implied warranty of | |
| 9 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
| 10 # GNU General Public License for more details. | |
| 11 # | |
| 12 # You should have received a copy of the GNU General Public License | |
| 13 # along with this program. If not, see http://www.gnu.org/licenses/. | |
| 14 #--> | |
| 15 | |
| 16 <tool id="summary_statistics" name="Summary statistics" version="1.0.0"> | |
| 17 <description>Produce simple descriptive statistics from a numerical table</description> | |
| 18 <requirements> | |
| 19 <requirement type="package">R</requirement> | |
| 20 <requirement type="package">bioconductor-edger</requirement> | |
| 21 <requirement type="package">bioconductor-limma</requirement> | |
| 22 <requirement type="package">r-batch</requirement> | |
| 23 <requirement type="package">r-locfit</requirement> | |
| 24 </requirements> | |
| 25 <stdio> | |
| 26 <!-- Anything other than zero is an error --> | |
| 27 <exit_code range="1:" level="fatal" /> | |
| 28 <exit_code range=":-1" level="fatal" /> | |
| 29 </stdio> | |
| 30 <command interpreter="Rscript"><![CDATA[ | |
| 31 summary_statistics_galaxy.R | |
| 32 file_in '${file_in}' | |
| 33 NA_code '${NA_code}' | |
| 34 stat '${stat_cond.stat}' | |
| 35 #if $stat_cond.stat =="T": | |
| 36 stat_chosen '${stat_cond.stat_chosen}' | |
| 37 #end if | |
| 38 ploting '${ploting_cond.ploting}' | |
| 39 #if $ploting_cond.ploting =="T": | |
| 40 plot_chosen '${ploting_cond.plot_chosen}' | |
| 41 #end if | |
| 42 table_file '${table_file}' | |
| 43 graph_file '${graph_file}' | |
| 44 log_file '${log_file}' | |
| 45 ]]></command> | |
| 46 <inputs> | |
| 47 <param format="csv,tabular" name="file_in" type="data" label="Input File" /> | |
| 48 <param name="NA_code" size="30" type="text" value="NA" label="Label used for Missing values" /> | |
| 49 <conditional name="stat_cond"> | |
| 50 <param name="stat" type="select" help="Do you want to compute some basic statistics?" label="Statistics table"> | |
| 51 <option value="T">Yes</option> | |
| 52 <option value="F" selected="true">No</option> | |
| 53 </param> | |
| 54 <when value="T"> | |
| 55 <param name="stat_chosen" type="select" display="checkboxes" multiple="True" label="Chosen statistic(s)"> | |
| 56 <option value="mean">mean</option> | |
| 57 <option value="sd">sd</option> | |
| 58 <option value="variance">variance</option> | |
| 59 <option value="median">median</option> | |
| 60 <option value="quartile">quartile</option> | |
| 61 <option value="decile">decile</option> | |
| 62 <validator type="empty_field" message="Please choose at least one statistic representation" /> | |
| 63 </param> | |
| 64 </when> | |
| 65 </conditional> | |
| 66 <conditional name="ploting_cond"> | |
| 67 <param name="ploting" type="select" help="Do you want some standard plots?" label="Plots"> | |
| 68 <option value="T">Yes</option> | |
| 69 <option value="F" selected="true">No</option> | |
| 70 </param> | |
| 71 <when value="T"> | |
| 72 <param name="plot_chosen" type="select" help="" display="checkboxes" multiple="True" label="Chosen plot(s)"> | |
| 73 <option value="boxplot">boxplot</option> | |
| 74 <option value="histogram">histogram</option> | |
| 75 <option value="density">density</option> | |
| 76 <option value="pairsplot">pairsplot</option> | |
| 77 <option value="MAplot">MAplot</option> | |
| 78 <validator type="empty_field" message="Please choose at least one ploting representation." /> | |
| 79 </param> | |
| 80 </when> | |
| 81 </conditional> | |
| 82 </inputs> | |
| 83 <outputs> | |
| 84 <data format="html" name="log_file" label="Summary_statistics_log" /> | |
| 85 <data format="tabular" name="table_file" label="Summary_statistics_report.tsv" > | |
| 86 <filter>(stat_cond['stat'] == 'T')</filter> | |
| 87 </data> | |
| 88 <data format="pdf" name="graph_file" label="Summary_statistics_report.pdf" > | |
| 89 <filter>(ploting_cond['ploting'] == 'T')</filter> | |
| 90 </data> | |
| 91 </outputs> | |
| 92 <tests> | |
| 93 <test> | |
| 94 <param name="file_in" value="decathlon.tsv"/> | |
| 95 <param name="Na_code" value="NA"/> | |
| 96 <conditional name="stat_cond"> | |
| 97 <param name="stat" value="T"/> | |
| 98 <param name="stat_chosen" value="mean,sd,variance,median,quartile,decile"/> | |
| 99 </conditional> | |
| 100 <conditional name="ploting_cond"> | |
| 101 <param name="ploting" value="T"/> | |
| 102 <param name="plot_chosen" value="boxplot,histogram,density,pairsplot,MAplot"/> | |
| 103 </conditional> | |
| 104 <output name="log_file" file="log_file"/> | |
| 105 <output name="table_file" file="table_file"/> | |
| 106 <output name="graph_file" file="graph_file" compare="sim_size"/> | |
| 107 </test> | |
| 108 </tests> | |
| 109 <help><![CDATA[ | |
| 110 | |
| 111 ================== | |
| 112 Summary statistics | |
| 113 ================== | |
| 114 | |
| 115 ----------- | |
| 116 Description | |
| 117 ----------- | |
| 118 | |
| 119 - This tool is part of a set of statistical tools made by members of the BIOS4BIOL group ("Normalization", "Summary statistics", "Hierarchical clustering" and "PCAFactoMineR"). | |
| 120 - Please use the Normalization module that come with the suite before using this module. | |
| 121 | |
| 122 | |
| 123 What it does: | |
| 124 - This program produces simple descriptive statistics from a numerical table. Statistical measures are computed for each column in the table. | |
| 125 | |
| 126 ------ | |
| 127 | |
| 128 ----------- | |
| 129 Input files | |
| 130 ----------- | |
| 131 | |
| 132 +---------------------------+------------+ | |
| 133 | Parameter : num + label | Format | | |
| 134 +===========================+============+ | |
| 135 | 1 : input file | tabular | | |
| 136 +---------------------------+------------+ | |
| 137 | |
| 138 The input table should be a tabulation-separated file. If your dataset is available in your current history but can not be selected via the Input File drop-down list, it may be due to incorrect format. | |
| 139 Please first check that your data are effectively tabulated. If it is, then you can redefine the Galaxy format of your table into "tabular" editing its attributes ("Datatype" panel) [`See the schematic explanation here`_]. | |
| 140 | |
| 141 | |
| 142 .. _See the schematic explanation here: https://raw.githubusercontent.com/IFB-ElixirFr/GFLS/master/summary_statistics/static/images/ChangeDatatype.pdf | |
| 143 | |
| 144 The first line should be a header (naming columns) and each line must begin with a row name, like the example below: | |
| 145 | |
| 146 .. image:: https://raw.githubusercontent.com/IFB-ElixirFr/GFLS/master/summary_statistics/static/images/input_count_file.png | |
| 147 | |
| 148 | |
| 149 | |
| 150 ---------- | |
| 151 Parameters | |
| 152 ---------- | |
| 153 | |
| 154 Label used for Missing values | |
| 155 | Missing value coding characters | |
| 156 | | |
| 157 | |
| 158 statistics table | |
| 159 | if YES, allow you to choose statistic(s) you want in your report | |
| 160 | | |
| 161 | |
| 162 Chosen statistic(s) | |
| 163 | select the statistics you want in your report (see above "Available statistics and plots") | |
| 164 | | |
| 165 | |
| 166 Plots | |
| 167 | if YES, allow you to choose plot(s) you want in your report | |
| 168 | | |
| 169 Chosen plot(s) | |
| 170 | select the plots you want in your report (see above "Available statistics and plots") | |
| 171 | | |
| 172 | |
| 173 ------------------------------ | |
| 174 Available statistics and plots | |
| 175 ------------------------------ | |
| 176 | |
| 177 **Numerical statistical measures provided are the following ones:** | |
| 178 | |
| 179 | |
| 180 .. image:: https://raw.githubusercontent.com/IFB-ElixirFr/GFLS/master/summary_statistics/static/images/descriptive_stat_all.png | |
| 181 :width: 500 | |
| 182 | |
| 183 | |
| 184 **Available plots:** | |
| 185 | |
| 186 | |
| 187 * boxplot | |
| 188 | |
| 189 | |
| 190 .. image:: https://raw.githubusercontent.com/IFB-ElixirFr/GFLS/master/summary_statistics/static/images/descriptive_stat_boxplot.png | |
| 191 | |
| 192 (source : SAS documentation) | |
| 193 | |
| 194 * histogram | |
| 195 | |
| 196 | |
| 197 .. image:: https://raw.githubusercontent.com/IFB-ElixirFr/GFLS/master/summary_statistics/static/images/descriptive_stat_histo.png | |
| 198 :width: 285 | |
| 199 | |
| 200 In this example, about 45 values of the dataset are greater than 0 and lower than 0.5 | |
| 201 | |
| 202 * density | |
| 203 | |
| 204 | |
| 205 .. image:: https://raw.githubusercontent.com/IFB-ElixirFr/GFLS/master/summary_statistics/static/images/descriptive_stat_density.png | |
| 206 :width: 275 | |
| 207 | |
| 208 This option computes kernel density estimates (gaussian smoothing). | |
| 209 While a histogram displays the observed distribution of a numerical variable, a density plot allows to view the estimated distribution of the theoretical continuous variable. | |
| 210 | |
| 211 * pairsplot | |
| 212 | |
| 213 | |
| 214 .. image:: https://raw.githubusercontent.com/IFB-ElixirFr/GFLS/master/summary_statistics/static/images/descriptive_stat_pairs.png | |
| 215 :width: 475 | |
| 216 | |
| 217 In this example, we have represented a pairs plot for a table with three columns, named | |
| 218 "a", "b" and "c". Each plot represents the values of a given column scaled on the x axis versus | |
| 219 the values of another column scaled on the y axis. | |
| 220 | |
| 221 * MAplot | |
| 222 | |
| 223 | |
| 224 .. image:: https://raw.githubusercontent.com/IFB-ElixirFr/GFLS/master/summary_statistics/static/images/descriptive_stat_maplot.png | |
| 225 :width: 275 | |
| 226 | |
| 227 Designed for genomic data (count data - only positive values accepted). | |
| 228 Each plot allows to compare two samples. | |
| 229 The ordinate axis (M) represents the log ratios (binary logarithm) whereas the abscissa axis (A) corresponds to the means of log values. | |
| 230 | |
| 231 | |
| 232 | |
| 233 ------------ | |
| 234 Output files | |
| 235 ------------ | |
| 236 | |
| 237 Summary_statistics_report.tsv | |
| 238 | contains a table with all the requested statistics | |
| 239 | | |
| 240 | |
| 241 Summary_statistics_report.pdf | |
| 242 | contains all the requested graphics | |
| 243 | | |
| 244 | |
| 245 Summary_statistics_log | |
| 246 | | |
| 247 | | |
| 248 | |
| 249 ------ | |
| 250 | |
| 251 **Authors** Luc Jouneau (luc.jouneau@inra.fr), Mélanie Pétéra (melanie.petera@inra.fr), Sarah Maman (sarah.maman@inra.fr) and Valentin Marcon (valentin.marcon@inra.fr) | |
| 252 | |
| 253 Contact : support.sigenae@inra.fr | |
| 254 | |
| 255 E-learning available : Not yet. | |
| 256 | |
| 257 - Information : | |
| 258 | |
| 259 Tool coded in the R language: | |
| 260 | *R Core Team. R: A language and environment for statistical computing. R* | |
| 261 | *Foundation for Statistical Computing, Vienna, Austria. URL https://www.R-project.org/.* | |
| 262 | |
| 263 .. class:: infomark | |
| 264 | |
| 265 ------------- | |
| 266 Please cite : | |
| 267 ------------- | |
| 268 | |
| 269 - (Depending on the help provided you can cite us in acknowledgements, references or both.) | |
| 270 | |
| 271 Acknowledgements | |
| 272 | We wish to thank SIGENAE group and the statistical CATI BIOS4Biol group : Mélanie Pétéra, Sarah Maman, Luc Jouneau | |
| 273 | Re-packaging was provided by Valentin Marcon (INRA, Migale platform http://migale.jouy.inra.fr), as part of the IFB project 'Galaxy For Life Science' (http://www.france-bioinformatique.fr/fr) | |
| 274 | | |
| 275 | |
| 276 References | |
| 277 | SIGENAE [http://www.sigenae.org/] | |
| 278 | |
| 279 ]]></help> | |
| 280 </tool> |
