comparison link_between_var.xml @ 0:c7dd4706f982 draft

"planemo upload for repository https://github.com/Marie59/Data_explo_tools commit 2f883743403105d9cac6d267496d985100da3958"
author ecology
date Tue, 27 Jul 2021 16:55:49 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:c7dd4706f982
1 <tool id="ecology_link_between_var" name="Variables exploration" version="@VERSION@" profile="20.01">
2 <description>Shows interaction, correlation, colinearity, produces a PCA and computes VIF for biodiversity abundance data</description>
3 <macros>
4 <import>macro.xml</import>
5 </macros>
6 <expand macro="Explo_requirements">
7 <requirement type="package" version="4.1">r-base</requirement>
8 <requirement type="package" version="1.1.1">r-cowplot</requirement>
9 <requirement type="package" version="2.1.2">r-ggally</requirement>
10 <requirement type="package" version="3.0_11">r-car</requirement>
11 <requirement type="package" version="1.0.7">r-dplyr</requirement>
12 <requirement type="package" version="0.1.3">r-ggcorrplot</requirement>
13 <requirement type="package" version="2.4">r-factominer</requirement>
14 <requirement type="package" version="1.0.7">r-factoextra</requirement>
15 </expand>
16 <command detect_errors="exit_code"><![CDATA[
17 Rscript
18 '$__tool_directory__/graph_link_var.r'
19 '$input'
20 '$colnames'
21 #if $method.type == 'collinearity':
22 'TRUE'
23 'FALSE'
24 'FALSE'
25 'FALSE'
26 'FALSE'
27 '$method.species'
28 '$method.columns'
29 ''
30 ''
31 ''
32 #elif $method.type == 'vif':
33 'FALSE'
34 'TRUE'
35 'FALSE'
36 'FALSE'
37 'FALSE'
38 ''
39 '$method.columns'
40 ''
41 ''
42 ''
43 #elif $method.type == 'pca':
44 'FALSE'
45 'FALSE'
46 'TRUE'
47 'FALSE'
48 'FALSE'
49 ''
50 '$method.columns'
51 ''
52 ''
53 ''
54 #elif $method.type == 'interr':
55 'FALSE'
56 'FALSE'
57 'FALSE'
58 'TRUE'
59 'FALSE'
60 '$method.species'
61 ''
62 '$method.variable'
63 '$method.variable2'
64 '$method.variable3'
65 #else:
66 'FALSE'
67 'FALSE'
68 'FALSE'
69 'FALSE'
70 'TRUE'
71 ''
72 ''
73 '$method.variable'
74 ''
75 ''
76 #end if
77 ]]>
78 </command>
79 <inputs>
80 <expand macro="explo_input"/>
81 <conditional name="method">
82 <param name="type" type="select" label="Variables links exploration">
83 <option value="collinearity">Collinearity between selected numerical variables for each species</option>
84 <option value="vif">Variance inflation factor (vif) on selected numerical variables</option>
85 <option value="pca">Principal component analysis (pca) on selected numerical variables</option>
86 <option value="interr">Interactions between 2 selected numerical variables</option>
87 <option value="autocorr">Autocorrelation of one selected numerical variable</option>
88 </param>
89 <when value="collinearity">
90 <param name="species" type="data_column" data_ref="input" numerical="false" label="Select column containing species" use_header_names="true"/>
91 <param name="columns" type="data_column" data_ref="input" numerical="true" multiple="true" label="Select columns containing numerical values" help="Select at least two columns" use_header_names="true"/>
92 </when>
93 <when value="vif">
94 <param name="columns" type="data_column" data_ref="input" numerical="true" multiple="true" label="Select columns containing numerical values" use_header_names="true"/>
95 </when>
96 <when value="pca">
97 <param name="columns" type="data_column" data_ref="input" numerical="true" multiple="true" label="Select columns containing numerical values" use_header_names="true"/>
98 </when>
99 <when value="interr">
100 <param name="variable" type="data_column" data_ref="input" numerical="true" label="Select column containing numerical values for x-axis" use_header_names="true"/>
101 <param name="variable2" type="data_column" data_ref="input" numerical="true" label="Select column containing numerical values for y-axis" use_header_names="true"/>
102 <param name="species" type="data_column" data_ref="input" numerical="false" label="Select column containing species" help="This parameter allows you to divide your scatterplot according to species" use_header_names="true"/>
103 <param name="variable3" type="data_column" data_ref="input" label="Select column" help="This parameter allows you to divide your scatterplot once more" use_header_names="true"/>
104 </when>
105 <when value="autocorr">
106 <param name="variable" type="data_column" data_ref="input" numerical="true" label="Select column containing numerical values" use_header_names="true"/>
107 </when>
108 </conditional>
109 </inputs>
110 <outputs>
111 <data name="output_coli" from_work_dir="Data.txt" format="txt" label="Collinearity analysis - Missing species">
112 <expand macro="explo_filter_colli"/>
113 </data>
114 <data name="output_acp" from_work_dir="valeurs.txt" format="txt" label="PCA (Principal Component Analysis) - Eigen values">
115 <expand macro="explo_filter_pca"/>
116 </data>
117 <data name="output_vif" from_work_dir="vif.tabular" format="tabular" label="Your VIF tabular">
118 <expand macro="explo_filter_vif"/>
119 </data>
120 <data name="output_corr" from_work_dir="corr.tabular" format="tabular" label="Correlation matrix">
121 <expand macro="explo_filter_vif"/>
122 </data>
123 <data name="output_interr" from_work_dir="Species.txt" format="txt" label="Interactions analysis - Species in data">
124 <expand macro="explo_filter_interr"/>
125 </data>
126 <data name="output_autocorr" from_work_dir="acf.txt" format="txt" label="Autocorrelation analysis - ACF table">
127 <expand macro="explo_filter_autocorr"/>
128 </data>
129 <collection type="list" name="plots">
130 <discover_datasets pattern="(?P&lt;designation&gt;.+)\.png" visible="false" format="png"/>
131 <filter>method['type'] != 'vif'</filter>
132 </collection>
133 </outputs>
134 <tests>
135 <test>
136 <param name="input" value="Reel_life_survey_fish_modif2.tabular"/>
137 <param name="colnames" value="true"/>
138 <conditional name="method">
139 <param name="type" value="collinearity"/>
140 <param name="species" value="15"/>
141 <param name="columns" value="12,17,18"/>
142 </conditional>
143 <output name="output_coli" value="Missing_species.txt"/>
144 <output_collection name="plots" type="list" count="3">
145 <element name="collinarity_of_Blenniidae" ftype="png">
146 <assert_contents>
147 <has_text text="PNG"/>
148 </assert_contents>
149 </element>
150 <element name="collinarity_of_Gobiidae" ftype="png">
151 <assert_contents>
152 <has_text text="PNG"/>
153 </assert_contents>
154 </element>
155 <element name="collinarity_of_Tripterygiidae" ftype="png">
156 <assert_contents>
157 <has_text text="PNG"/>
158 </assert_contents>
159 </element>
160 </output_collection>
161 </test>
162 </tests>
163 <expand macro="topic"/>
164 <help><![CDATA[
165 =================================
166 Determine links between variables
167 =================================
168
169 - Show the collinearity among the covariates
170 - Plot a Pincipal Component Analysis (PCA)
171 - Compute the Variance Inflation Factor (VIF)
172 - Show if there is auto-correlation
173 - Show the interactions between variables
174
175 **Collinearity between selected numerical variables for each species**
176
177 This tool shows if multiple numerical variables shows colinearity or not between one another.
178
179 Input description :
180
181 A tabular file with observation data. Must at least contain three columns, species and multiple numerical variable.
182
183 +-------------+------------+---------------+
184 | number1 | number2 | species.code |
185 +=============+============+===============+
186 | 2 | 4 | speciesID |
187 +-------------+------------+---------------+
188 | ... | ... | ... |
189 +-------------+------------+---------------+
190
191 Output description :
192
193 A png file with one plot containing multiple correlation plots and the correlation values between each variables.
194
195 Warning : When there are more than 3 species in the data this tool shows one plot for each species.
196
197
198 **Variance Inflation Factor (VIF) on selected numerical variables**
199
200 This tool calculates the correlation matrix and the Variance Inflation Factor between each pair of the selected numerical variables.
201
202 Input description:
203
204 A tabular file with observation data. Must at least contain two columns of numerical variables.
205
206
207 Output description :
208
209 Two tabulars :
210
211 - One with VIF values for each pair, it measures how much the behavior (variance) of an independent variable is influenced, or inflated, by its interaction/correlation with the other independent variables. A large VIF on an independent variable indicates a highly collinear relationship to the other variable that should be considered or adjusted for in the structure of the model and selection of independent variable.
212
213 - One containing the correlation matrix.
214
215
216 **Principal Component Analysis (PCA) on selected numerical variables**
217
218 This tool computes a Principal Component Analysis.
219
220 Input description:
221
222 A tabular file with observation data with numerical variables.
223
224 Output description:
225
226 Two png files with plots. The first one is showing the PCA plot :
227
228 - The positively correlated variables are grouped together.
229
230 - The negatively ones are on opposite sides of the plot's origin.
231
232 - the distance between the variables and the origin calculates the quality of the representation of the variables. The variables far from the origin are well represented by the PCA.
233
234 The quality of the representation is also calculated and represented with the cos2 determined with colors :
235
236 - A high cos2 indicates a good representation of the variable. In this case, the variable is near the circumference of the correlation circle.
237
238 - A low cos2 indicates that the variable is not perfectly represented. In this case, the variable is near the center of the circle.
239
240 The second plot is about the quality of the PCA, it represents the correlation between dimensions of the PCA and the selected variables.
241
242 A text file containing eigen values of the PCA.
243
244
245 **Interactions between two selected numerical variables**
246
247 This tool represents the interactions between variables through multiple scatterplots.
248
249 Input description:
250
251 A tabular file with observation data. Must at least contain four columns two numerical variables, any other variables and species.
252
253 +----------+-----------+--------------+------------+
254 | number1 | variable | species.code | number2 |
255 +==========+===========+==============+============+
256 | 2 | var | speciesID | 4 |
257 +----------+-----------+--------------+------------+
258 | ... | ... | ... | ... |
259 +----------+-----------+--------------+------------+
260
261 Output description:
262
263 PNG files (one per species) with plots showing the interactions between the two numerical variables for each separation factor.
264
265 A text file with a recap of the species column used for the analysis.
266
267
268 **Autocorrelation of one selected numerical variable**
269
270 This tool computes the ACF (Auto-Correlation Function) and represents the autocorrelation of a numerical variable.
271
272 Input description:
273
274 A tabular file with observation data. Must at least contain one column with a numerical variable.
275
276
277 Output description:
278
279 A png file with one plot showing the autocorrelation for a variable. If the bars of the histogram are strictly confined between the dashed lines (representing 95% confidence interval without white noise), there is auto-correlation.
280
281 A text file containing the ACF values.
282
283 ]]></help>
284 <expand macro="explo_bibref"/>
285 </tool>