Mercurial > repos > ecology > ecology_link_between_var
diff link_between_var.xml @ 0:c7dd4706f982 draft
"planemo upload for repository https://github.com/Marie59/Data_explo_tools commit 2f883743403105d9cac6d267496d985100da3958"
author | ecology |
---|---|
date | Tue, 27 Jul 2021 16:55:49 +0000 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/link_between_var.xml Tue Jul 27 16:55:49 2021 +0000 @@ -0,0 +1,285 @@ +<tool id="ecology_link_between_var" name="Variables exploration" version="@VERSION@" profile="20.01"> + <description>Shows interaction, correlation, colinearity, produces a PCA and computes VIF for biodiversity abundance data</description> + <macros> + <import>macro.xml</import> + </macros> + <expand macro="Explo_requirements"> + <requirement type="package" version="4.1">r-base</requirement> + <requirement type="package" version="1.1.1">r-cowplot</requirement> + <requirement type="package" version="2.1.2">r-ggally</requirement> + <requirement type="package" version="3.0_11">r-car</requirement> + <requirement type="package" version="1.0.7">r-dplyr</requirement> + <requirement type="package" version="0.1.3">r-ggcorrplot</requirement> + <requirement type="package" version="2.4">r-factominer</requirement> + <requirement type="package" version="1.0.7">r-factoextra</requirement> + </expand> + <command detect_errors="exit_code"><![CDATA[ + Rscript + '$__tool_directory__/graph_link_var.r' + '$input' + '$colnames' + #if $method.type == 'collinearity': + 'TRUE' + 'FALSE' + 'FALSE' + 'FALSE' + 'FALSE' + '$method.species' + '$method.columns' + '' + '' + '' + #elif $method.type == 'vif': + 'FALSE' + 'TRUE' + 'FALSE' + 'FALSE' + 'FALSE' + '' + '$method.columns' + '' + '' + '' + #elif $method.type == 'pca': + 'FALSE' + 'FALSE' + 'TRUE' + 'FALSE' + 'FALSE' + '' + '$method.columns' + '' + '' + '' + #elif $method.type == 'interr': + 'FALSE' + 'FALSE' + 'FALSE' + 'TRUE' + 'FALSE' + '$method.species' + '' + '$method.variable' + '$method.variable2' + '$method.variable3' + #else: + 'FALSE' + 'FALSE' + 'FALSE' + 'FALSE' + 'TRUE' + '' + '' + '$method.variable' + '' + '' + #end if + ]]> + </command> + <inputs> + <expand macro="explo_input"/> + <conditional name="method"> + <param name="type" type="select" label="Variables links exploration"> + <option value="collinearity">Collinearity between selected numerical variables for each species</option> + <option value="vif">Variance inflation factor (vif) on selected numerical variables</option> + <option value="pca">Principal component analysis (pca) on selected numerical variables</option> + <option value="interr">Interactions between 2 selected numerical variables</option> + <option value="autocorr">Autocorrelation of one selected numerical variable</option> + </param> + <when value="collinearity"> + <param name="species" type="data_column" data_ref="input" numerical="false" label="Select column containing species" use_header_names="true"/> + <param name="columns" type="data_column" data_ref="input" numerical="true" multiple="true" label="Select columns containing numerical values" help="Select at least two columns" use_header_names="true"/> + </when> + <when value="vif"> + <param name="columns" type="data_column" data_ref="input" numerical="true" multiple="true" label="Select columns containing numerical values" use_header_names="true"/> + </when> + <when value="pca"> + <param name="columns" type="data_column" data_ref="input" numerical="true" multiple="true" label="Select columns containing numerical values" use_header_names="true"/> + </when> + <when value="interr"> + <param name="variable" type="data_column" data_ref="input" numerical="true" label="Select column containing numerical values for x-axis" use_header_names="true"/> + <param name="variable2" type="data_column" data_ref="input" numerical="true" label="Select column containing numerical values for y-axis" use_header_names="true"/> + <param name="species" type="data_column" data_ref="input" numerical="false" label="Select column containing species" help="This parameter allows you to divide your scatterplot according to species" use_header_names="true"/> + <param name="variable3" type="data_column" data_ref="input" label="Select column" help="This parameter allows you to divide your scatterplot once more" use_header_names="true"/> + </when> + <when value="autocorr"> + <param name="variable" type="data_column" data_ref="input" numerical="true" label="Select column containing numerical values" use_header_names="true"/> + </when> + </conditional> + </inputs> + <outputs> + <data name="output_coli" from_work_dir="Data.txt" format="txt" label="Collinearity analysis - Missing species"> + <expand macro="explo_filter_colli"/> + </data> + <data name="output_acp" from_work_dir="valeurs.txt" format="txt" label="PCA (Principal Component Analysis) - Eigen values"> + <expand macro="explo_filter_pca"/> + </data> + <data name="output_vif" from_work_dir="vif.tabular" format="tabular" label="Your VIF tabular"> + <expand macro="explo_filter_vif"/> + </data> + <data name="output_corr" from_work_dir="corr.tabular" format="tabular" label="Correlation matrix"> + <expand macro="explo_filter_vif"/> + </data> + <data name="output_interr" from_work_dir="Species.txt" format="txt" label="Interactions analysis - Species in data"> + <expand macro="explo_filter_interr"/> + </data> + <data name="output_autocorr" from_work_dir="acf.txt" format="txt" label="Autocorrelation analysis - ACF table"> + <expand macro="explo_filter_autocorr"/> + </data> + <collection type="list" name="plots"> + <discover_datasets pattern="(?P<designation>.+)\.png" visible="false" format="png"/> + <filter>method['type'] != 'vif'</filter> + </collection> + </outputs> + <tests> + <test> + <param name="input" value="Reel_life_survey_fish_modif2.tabular"/> + <param name="colnames" value="true"/> + <conditional name="method"> + <param name="type" value="collinearity"/> + <param name="species" value="15"/> + <param name="columns" value="12,17,18"/> + </conditional> + <output name="output_coli" value="Missing_species.txt"/> + <output_collection name="plots" type="list" count="3"> + <element name="collinarity_of_Blenniidae" ftype="png"> + <assert_contents> + <has_text text="PNG"/> + </assert_contents> + </element> + <element name="collinarity_of_Gobiidae" ftype="png"> + <assert_contents> + <has_text text="PNG"/> + </assert_contents> + </element> + <element name="collinarity_of_Tripterygiidae" ftype="png"> + <assert_contents> + <has_text text="PNG"/> + </assert_contents> + </element> + </output_collection> + </test> + </tests> + <expand macro="topic"/> + <help><![CDATA[ +================================= +Determine links between variables +================================= + +- Show the collinearity among the covariates +- Plot a Pincipal Component Analysis (PCA) +- Compute the Variance Inflation Factor (VIF) +- Show if there is auto-correlation +- Show the interactions between variables + +**Collinearity between selected numerical variables for each species** + +This tool shows if multiple numerical variables shows colinearity or not between one another. + +Input description : + +A tabular file with observation data. Must at least contain three columns, species and multiple numerical variable. + ++-------------+------------+---------------+ +| number1 | number2 | species.code | ++=============+============+===============+ +| 2 | 4 | speciesID | ++-------------+------------+---------------+ +| ... | ... | ... | ++-------------+------------+---------------+ + +Output description : + +A png file with one plot containing multiple correlation plots and the correlation values between each variables. + +Warning : When there are more than 3 species in the data this tool shows one plot for each species. + + +**Variance Inflation Factor (VIF) on selected numerical variables** + +This tool calculates the correlation matrix and the Variance Inflation Factor between each pair of the selected numerical variables. + +Input description: + +A tabular file with observation data. Must at least contain two columns of numerical variables. + + +Output description : + +Two tabulars : + +- One with VIF values for each pair, it measures how much the behavior (variance) of an independent variable is influenced, or inflated, by its interaction/correlation with the other independent variables. A large VIF on an independent variable indicates a highly collinear relationship to the other variable that should be considered or adjusted for in the structure of the model and selection of independent variable. + +- One containing the correlation matrix. + + +**Principal Component Analysis (PCA) on selected numerical variables** + +This tool computes a Principal Component Analysis. + +Input description: + +A tabular file with observation data with numerical variables. + +Output description: + +Two png files with plots. The first one is showing the PCA plot : + +- The positively correlated variables are grouped together. + +- The negatively ones are on opposite sides of the plot's origin. + +- the distance between the variables and the origin calculates the quality of the representation of the variables. The variables far from the origin are well represented by the PCA. + +The quality of the representation is also calculated and represented with the cos2 determined with colors : + +- A high cos2 indicates a good representation of the variable. In this case, the variable is near the circumference of the correlation circle. + +- A low cos2 indicates that the variable is not perfectly represented. In this case, the variable is near the center of the circle. + +The second plot is about the quality of the PCA, it represents the correlation between dimensions of the PCA and the selected variables. + +A text file containing eigen values of the PCA. + + +**Interactions between two selected numerical variables** + +This tool represents the interactions between variables through multiple scatterplots. + +Input description: + +A tabular file with observation data. Must at least contain four columns two numerical variables, any other variables and species. + ++----------+-----------+--------------+------------+ +| number1 | variable | species.code | number2 | ++==========+===========+==============+============+ +| 2 | var | speciesID | 4 | ++----------+-----------+--------------+------------+ +| ... | ... | ... | ... | ++----------+-----------+--------------+------------+ + +Output description: + +PNG files (one per species) with plots showing the interactions between the two numerical variables for each separation factor. + +A text file with a recap of the species column used for the analysis. + + +**Autocorrelation of one selected numerical variable** + +This tool computes the ACF (Auto-Correlation Function) and represents the autocorrelation of a numerical variable. + +Input description: + +A tabular file with observation data. Must at least contain one column with a numerical variable. + + +Output description: + +A png file with one plot showing the autocorrelation for a variable. If the bars of the histogram are strictly confined between the dashed lines (representing 95% confidence interval without white noise), there is auto-correlation. + +A text file containing the ACF values. + + ]]></help> + <expand macro="explo_bibref"/> +</tool>