comparison CorrTable/Corr.xml @ 0:b22c453e4cf4 draft

Uploaded
author melpetera
date Thu, 11 Oct 2018 05:35:55 -0400
parents
children 29ec7e3afdd4
comparison
equal deleted inserted replaced
-1:000000000000 0:b22c453e4cf4
1 <tool id="corrtable" name="Between-table Correlation" version="0.0.0">
2 <description>Correlation table between two tables and graphic representation </description>
3 <requirements>
4 <requirement type="package" version="1.1_4">r-batch</requirement>
5 <requirement type="package" version="3.0.0">r-ggplot2</requirement>
6 <requirement type="package" version="1.4.3">r-reshape2</requirement>
7 </requirements>
8 <command interpreter="Rscript">
9
10 Corr_wrap.r
11
12 tab1_in "$tab1_in"
13 tab2_in "$tab2_in"
14
15 tab1_samples "$tab1_samples"
16 tab2_samples "$tab2_samples"
17
18 corr_method "$corr_method"
19
20 test_corr "${filter_section.testcorr_cond.test_corr}"
21 #if str($filter_section.testcorr_cond.test_corr) == 'yes' :
22 correct_multi "${filter_section.testcorr_cond.correct_multi}"
23 risk_alpha "${filter_section.testcorr_cond.risk_alpha}"
24 #end if
25
26 filter "${filter_section.filter_cond.filter}"
27 #if str($filter_section.filter_cond.filter) == 'yes' :
28 filters_choice "${filter_section.filter_cond.filtchoice_cond.filters_choice}"
29 #if str($filter_section.filter_cond.filtchoice_cond.filters_choice) == 'filters_0_thr' :
30 threshold "${filter_section.filter_cond.filtchoice_cond.threshold}"
31 #end if
32 #end if
33
34 reorder_var "$out_section.reorder_var"
35
36 color_heatmap "${out_section.heatmap_cond.color_heatmap}"
37 #if str($out_section.heatmap_cond.color_heatmap) == 'yes' :
38 type_classes "${out_section.heatmap_cond.typeclass_cond.type_classes}"
39 #if str($out_section.heatmap_cond.typeclass_cond.type_classes) == 'regular' :
40 reg_class_value "${out_section.heatmap_cond.typeclass_cond.reg_class_value}"
41 #elif str($out_section.heatmap_cond.typeclass_cond.type_classes) == 'irregular' :
42 irreg_class_vect "${out_section.heatmap_cond.typeclass_cond.irreg_class_vect}"
43 #end if
44 #end if
45
46 tabcorr_out "$tabcorr_out"
47 heatmap_out "$heatmap_out"
48
49 </command>
50
51 <inputs>
52
53 <param name="tab1_in" type="data" label="Table 1 file" help="The two input tables must have the same sample IDs" format="tabular" />
54 <param name="tab1_samples" label="Where are the samples in table 1?" type="select" display="radio" help="">
55 <option value="row">Row</option>
56 <option value="column">Column</option>
57 </param>
58
59 <param name="tab2_in" type="data" label="Table 2 file" help="The two input tables must have the same sample IDs" format="tabular" />
60 <param name="tab2_samples" label="Where are the samples in table 2?" type="select" display="radio" help="">
61 <option value="row">Row</option>
62 <option value="column">Column</option>
63 </param>
64
65 <param name="corr_method" label="Method to calculate the correlation coefficients" type="select" help="">
66 <option value="pearson">Pearson</option>
67 <option value="spearman">Spearman</option>
68 <option value="kendall">Kendall</option>
69 </param>
70
71 <section name="filter_section" title="Filtering options" expanded="False">
72 <conditional name="testcorr_cond">
73 <param name="test_corr" label="Significance test for the correlation coefficients" type="select" display="radio" help="">
74 <option value="no">No</option>
75 <option value="yes">Yes</option>
76 </param>
77 <when value="yes">
78 <param name="correct_multi" label="Method for multiple testing correction" type="select" help="">
79 <option value="none">none</option>
80 <option value="fdr">fdr</option>
81 <option value="BH">BH</option>
82 <option value="bonferroni">bonferroni</option>
83 <option value="BY">BY</option>
84 <option value="hochberg">hochberg</option>
85 <option value="holm">holm</option>
86 <option value="hommel">hommel</option>
87 </param>
88 <param name="risk_alpha" label="(Corrected) p-value significance threshold" type="float" value="0.05" help="Must be between 0 and 1" />
89 </when>
90 <when value="no">
91 </when>
92 </conditional>
93
94 <conditional name="filter_cond">
95 <param name="filter" label="Filter the correlation table" type="select" display="radio" help="">
96 <option value="no">No</option>
97 <option value="yes">Yes</option>
98 </param>
99
100 <when value ="yes">
101 <conditional name="filtchoice_cond">
102 <param name="filters_choice" label="Do you want to use only zero filter or combine it with the threshold filter?" type="select" display="radio" help="The zero filter removes variables which have all their correlation coefficients equal to 0. The threshold filter removes variables which have all their correlation coefficients, in absolute value, strictly below a threshold.">
103 <option value="filter_0">Only zero filter</option>
104 <option value="filters_0_thr">Threshold filter</option>
105 </param>
106
107 <when value="filters_0_thr">
108 <param name="threshold" label="Threshold" type="float" value="" help="Must be between 0 and 1" />
109 </when>
110
111 <when value="filter_0">
112 </when>
113 </conditional>
114 </when>
115
116 <when value="no">
117 </when>
118 </conditional>
119 </section>
120
121 <section name="out_section" title="Graphical outputs" expanded="False">
122 <param name="reorder_var" label="Reorder variables (using Hierarchical Cluster Analysis)" type="select" display="radio" help="">
123 <option value="no">No</option>
124 <option value="yes">Yes</option>
125 </param>
126
127 <conditional name="heatmap_cond">
128 <param name="color_heatmap" label="Colored correlation table strategy" type="select" display="radio" help="Standard corresponds to a scale with a smooth gradient between three colors: red, white and green (continuous case). Customized creates classes for the correlation coefficients - the scale has discrete values.">
129 <option value="no">Standard</option>
130 <option value="yes">Customized</option>
131 </param>
132
133 <when value="yes">
134 <conditional name="typeclass_cond">
135 <param name="type_classes" label="Choose the type of classes" type="select" display="radio" help="Regular means the classes have the same size. Irregular means it is possible to choose any intervals." >
136 <option value="regular">Regular classes</option>
137 <option value="irregular">Irregular classes</option>
138 </param>
139
140 <when value="regular">
141 <param name="reg_class_value" label="Class size" type="float" value="" help="Must be between 0 and 1" />
142 </when>
143
144 <when value="irregular">
145 <param name="irreg_class_vect" label="Vector with values for classes" type="text" value="" help="The vector must be of the following form: (value1,value2,value3,..). The values must be between -1 and 1 not included. For example: (-0.8,-0.5,-0.4,0,0.4,0.5,0.8)." />
146 </when>
147 </conditional>
148 </when>
149
150 <when value ="no">
151 </when>
152
153 </conditional>
154 </section>
155
156 </inputs>
157
158 <outputs>
159 <data name="tabcorr_out" label="CorrTable" format="tabular" />
160 <data name="heatmap_out" label="CT_plot" format="pdf" />
161 </outputs>
162
163 <help>
164
165 .. class:: infomark
166
167 **Author:**
168 Ophelie Barbet for original code (PFEM - INRA)
169 Maintainer: Melanie Petera (PFEM - INRA - MetaboHUB)
170
171 ---------------------------------------------------
172
173 =========================
174 Between-table Correlation
175 =========================
176
177 -----------
178 Description
179 -----------
180
181 | Allows to visualise links existing between two data tables, with the creation of a correlation table between the variables of these tables, and a heatmap representing the correlation table colored according to the coefficients.
182 |
183
184 -----------
185 Input files
186 -----------
187
188 +----------------------------+------------+
189 | Parameter | Format |
190 +============================+============+
191 | 1 : Table 1 file | tabular |
192 +----------------------------+------------+
193 | 2 : Table 2 file | tabular |
194 +----------------------------+------------+
195
196 |
197 | The two input tables must have the same sample IDs.
198 |
199
200 ----------
201 Parameters
202 ----------
203
204 Positions of samples in table 1 and table 2
205 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
206 | Essential to correctly calculate the correlations.
207 |
208
209 Method for calculating the correlation coefficients
210 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
211 | - 'Pearson': Measures the intensity of the linear association between two continuous variables.
212 | - The 'Spearman' and 'Kendall' methods are explained in the R documentation of the 'cor' function as follows: " Kendall's tau or Spearman's rho statistic is used to estimate a rank-based measure of association. These are more robust and have been recommended if the data do not necessarily come from a bivariate normal distribution.".
213 |
214
215 Significance test for the correlation coefficients
216 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
217 | This test is performed on each correlation coefficient, with the following hypotheses:
218 | H0: The correlation coefficient is not significantly different from zero.
219 | H1: The correlation coefficient is significantly different from zero.
220 |
221 | Coefficients whose null hypothesis (H0) are not rejected are replaced by zeros in the correlation table.
222 |
223
224 | **Method for multiple testing correction (only if significance test is 'Yes'):**
225 | The 7 methods implemented in the 'p.adjust' R function are available and documented as follows:
226 | "The adjustment methods include the Bonferroni correction ("bonferroni") in which the p-values are multiplied by the number of comparisons. Less conservative corrections are also included by Holm (1979) ("holm"), Hochberg (1988) ("hochberg"), Hommel (1988) ("hommel"), Benjamini and Hochberg (1995) ("BH" or its alias "fdr"), and Benjamini and Yekutieli (2001) ("BY"), respectively. A pass-through option ("none") is also included. The set of methods are contained in the p.adjust.methods vector for the benefit of methods that need to have the method as an option and pass it on to p.adjust. The first four methods are designed to give strong control of the family-wise error rate. There seems no reason to use the unmodified Bonferroni correction because it is dominated by Holm's method, which is also valid under arbitrary assumptions. Hochberg's and Hommel's methods are valid when the hypothesis tests are independent or when they are non-negatively associated (Sarkar, 1998; Sarkar and Chang, 1997). Hommel's method is more powerful than Hochberg's, but the difference is usually small and the Hochberg p-values are faster to compute. The "BH" (aka "fdr") and "BY" method of Benjamini, Hochberg, and Yekutieli control the false discovery rate, the expected proportion of false discoveries amongst the rejected hypotheses. The false discovery rate is a less stringent condition than the family-wise error rate, so these methods are more powerfil than the others."
227 |
228
229 | **(Corrected) p-value significance threshold (only if significance test is 'Yes'):**
230 | A value between 0 and 1, usually 0.05.
231 |
232
233 Filter the correlation table
234 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
235 | Allows to reduce the correlation table size by keeping only variables considered relevant.
236 |
237
238 | **Choose the filters to apply (only if filter is 'Yes'):**
239 | - 'Only zero filter': Remove variables with all their correlation coefficients equal to zero.
240 | - 'Threshold filter': Remove variables with all their correlation coefficients (in absolute value) strictly below a threshold.
241
242 | *Choose a threshold (only threshold filter is used):* A value between 0 and 1.
243 |
244
245 Reorder variables using Hierarchical Cluster Analysis (HCA)
246 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
247 | Allows the most linked variables to be close in the correlation table.
248 | A HCA is performed on each input tables, with:
249 | - 1 - correlation coefficient, as distance
250 | - Ward as aggregation method.
251 |
252
253
254 Colored correlation table strategy
255 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
256 | Allows to create a colored correlation table. Variables of table 1 and variables of table 2 are related using colored rectangles.
257 | About the colors, the negative correlations are in red, more or less intense according to their position between -1 and 0, and the positive correlations in green, more or less intense according to their position between 0 and 1. The coefficients equal to 0 are in white.
258 | - 'Standard': the graphical representation has a scale with a smooth gradient between three colors: red, white and green.
259 | - 'Customized': the colored correlation table has coefficient classes. It is possible to create regular or irregular classes. The scale is discreet.
260 |
261
262 | **Choose the type of classes (only if colored correlation table strategy is 'Customized'):**
263
264 | - 'Regular': classes are all (or almost) the same size.
265 | To realize these intervals, we start from 1 to go to 0 by taking a step of the size chosen by the user, and we make the symmetry for -1 towards 0. If the last step does not fall on the 0 value, we create a class between this last value and 0, smaller in size than the others. It is important to specify that 0 represents a class on its own, which is assigned the color white for the heatmap.
266
267 | *Size of classes (if regular classes):* A value between 0 and 1.
268
269 | Example: if the size is 0.4, classes are [-1;-0.6], ]-0.6;-0.2], ]-0.2;0[, 0, ]0;0.2], ]0.2;0.6] and ]0.6;1].
270 |
271
272 | - 'Irregular': classes have variable lengths.
273 | It is possible to do as many classes as you want, and of any size. There is not necessarily symmetry between -1 and 0, and 0 and 1. You can choose to have a white class with only 0, or an interval which contains the value 0.
274
275 | *Vector with values for classes (if irregular classes):* The values in the vector must be between -1 and 1 excluded, and in ascending order. It must have this form (value1,value2,...). If the vector contains 0, then this value becomes a class on its own, otherwise the white class is the one which contains 0.
276
277 | Example: if the vector is (-0.8,-0.5,-0.4,0,0.4,0.5,0.8), the classes are [-1;-0.8], ]-0.8;-0.5], ]-0.5;-0.4], ]-0.4;0[, 0, ]0;0.4], ]0.4;0.5], ]0.5;0.8] and ]0.8;1].
278 |
279
280
281 ------------
282 Output files
283 ------------
284
285 Correlation Table
286 ^^^^^^^^^^^^^^^^^
287 | Tabular output
288 | Correlation table between the variables of the two input tables
289 |
290
291 Heatmap (colored correlation table)
292 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
293 | Pdf output
294 | Colored representation of the correlation table. The coefficients are replaced by colors. A coefficient close to -1 is red, close to 0 white, and close to 1 in green.
295 |
296
297
298 </help>
299
300 </tool>