Mercurial > repos > workflow4metabolomics > correlation_analysis
diff abims_correlation_analysis.xml @ 0:58997c28b268 draft default tip
"planemo upload for repository https://github.com/workflow4metabolomics/tools-metabolomics/blob/master/tools/correlation_analysis/ commit 35a01e4ef59a91f43d0b1de1d08db29dcc7aae1e"
author | workflow4metabolomics |
---|---|
date | Tue, 19 Jan 2021 16:41:47 +0000 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/abims_correlation_analysis.xml Tue Jan 19 16:41:47 2021 +0000 @@ -0,0 +1,322 @@ +<tool id="correlation_analysis" name="Metabolites Correlation Analysis" version="1.0.1+galaxy0" > + + <description>to highlight ion correlations considering PC-groups</description> + + <requirements> + <requirement type="package" version="1.1_5">r-batch</requirement> + <requirement type="package" version="0.8.8">r-reshape</requirement> + <requirement type="package" version="7.3_53">r-mass</requirement> + </requirements> + + <command detect_errors='exit_code'> + Rscript '$__tool_directory__/correlation_analysis.r' + + #if $cond_input_type.select_input_type == "select_input_from_w4m" and $cond_input_type.cond_function.select_funtion == "sort_only" : + sorting 1 variable_metadata '$cond_input_type.variableMetadata' + data_matrix '$cond_input_type.dataMatrix' + sample_metadata '$cond_input_type.sampleMetadata' + corrdel 0 + param_correlation "" + param_cytoscape "" + matrix_corr 0 + user_matrix_corr "" + corr_method "" + #end if + #if $cond_input_type.select_input_type == "select_input_from_w4m" and $cond_input_type.cond_function.select_funtion == "sort_and_corr" : + sorting 1 + variable_metadata '$cond_input_type.variableMetadata' + data_matrix '$cond_input_type.dataMatrix' + sample_metadata '$cond_input_type.sampleMetadata' + corrdel 1 + param_correlation $cond_input_type.cond_function.param_correlation + param_cytoscape $cond_input_type.cond_function.param_cytoscape + matrix_corr 0 + user_matrix_corr "" + corr_method $cond_input_type.cond_function.corr_method + #end if + ##Create correlation matrix from a user table file.## + #if $cond_input_type.select_input_type == "select_input_other" : + sorting 0 + variable_metadata "" + data_matrix "" + sample_metadata "" + corrdel 0 + param_correlation "" + param_cytoscape $cond_input_type.param_cytoscape + matrix_corr 1 + user_matrix_corr '$cond_input_type.user_matrix_corr' + corr_method $cond_input_type.corr_method + #end if + + </command> + + <inputs> + + + <conditional name="cond_input_type" > + <param name="select_input_type" type="select" label="Choice of your input files" help="" > + <option value="select_input_from_w4m" selected="true">Files from the metabolomic workflow</option> + <option value="select_input_other" >Your table file</option> + </param> + <when value="select_input_from_w4m"> + <param name="dataMatrix" type="data" label="Data matrix" format="tabular" help="dataMatrix file from the CAMERA.annotate step for example" /> + <param name="sampleMetadata" type="data" label="Sample metadata" format="tabular" help="sampleMetadata file from the xcms.xcmsSet step for example" /> + <param name="variableMetadata" type="data" label="Variable metadata" format="tabular" help="variableMetadata file from the CAMERA.annotate step for example" /> + <conditional name="cond_function" > + <param name="select_funtion" type="select" label="Function to be used" help="" > + <option value="sort_only" selected="true">Sorting your table</option> + <option value="sort_and_corr" >Sorting your table and doing correlation analysis</option> + </param> + <when value="sort_only" /> + <when value="sort_and_corr"> + <param name="corrdel" type="hidden" value="1"/> + <param name="param_correlation" type="float" label="Correlation threshold for pcgroup" value="0.60" help="Threshold value for selecting edges (i.e. correlations) that will be exported to the Cytoscape sif format file" /> + <param name="corr_method" type="select" label="Choice of the correlation method" help="" > + <option value="pearson" selected="true">pearson</option> + <option value="kendall">kendall</option> + <option value="spearman">spearman</option> + </param> + <param name="param_cytoscape" type="float" label="Cytoscape correlation threshold" value="0.75" help="Choose a threshold value for selecting metabolites that will be exported to a cytoscape sif format" /> + </when> + </conditional> + </when> + + <when value="select_input_other"> + <param name="user_matrix_corr" type="data" label="Your table file (tabular format)" format="tabular" help="Your metabolites (variables) intensity table file (tabular format)" /> + <param name="corr_method" type="select" label="Choice of the correlation method" help="" > + <option value="pearson" selected="true">pearson</option> + <option value="kendall">kendall</option> + <option value="spearman">spearman</option> + </param> + <param name="param_cytoscape" type="float" label="Cytoscape correlation threshold" value="0.75" help="Threshold value for selecting edges (i.e. correlations) that will be exported to the Cytoscape sif format file" /> + </when> + </conditional> + </inputs> + + <outputs> + <data name="sorted_table" format="tabular" from_work_dir="sorted_table.tsv" label="sorted_variableMetadata.tsv"> + <filter>(cond_input_type['select_input_type'] == 'select_input_from_w4m')</filter> + </data> + <data name="correlation_matrix_selected" format="tabular" from_work_dir="correlation_matrix_selected.tsv" label="correlation_matrix_selected.tsv"> + <filter>(cond_input_type['select_input_type'] == 'select_input_from_w4m' and cond_input_type['cond_function']['select_funtion']== 'sort_and_corr' and cond_input_type['cond_function']['corrdel']== '1' )</filter> + </data> + <data name="siff_table" format="tabular" from_work_dir="siff_table.tsv" label="sif_table.tsv"> + <filter>(cond_input_type['select_input_type'] == 'select_input_from_w4m' and cond_input_type['cond_function']['select_funtion']== 'sort_and_corr' and cond_input_type['cond_function']['corrdel']== '1' )</filter> + </data> + <data name="correlation_matrix_user" format="tabular" from_work_dir="correlation_matrix.tsv" label="correlation_matrix.tsv"> + <filter>(cond_input_type['select_input_type'] == 'select_input_other')</filter> + </data> + <data name="siff_table_user_user" format="tabular" from_work_dir="siff_table.tsv" label="sif_table.tsv"> + <filter>(cond_input_type['select_input_type'] == 'select_input_other')</filter> + </data> + </outputs> + + <tests> + <test expect_num_outputs="3"> + <conditional name="cond_input_type" > + <param name="select_input_type" value="select_input_from_w4m" /> + <param name="dataMatrix" value="in_DM1.tabular" ftype="tabular" /> + <param name="sampleMetadata" value="in_SM1.tabular" ftype="tabular" /> + <param name="variableMetadata" value="in_VM1.tabular" ftype="tabular" /> + <conditional name="cond_function" > + <param name="select_funtion" value="sort_and_corr" /> + </conditional> + </conditional> + <output name="sorted_table" file="out_VM1.tabular"/> + <output name="correlation_matrix_selected" file="out_corr1.tabular"/> + <output name="siff_table" file="out_sif1.tabular"/> + </test> + </tests> + + <help> + +.. class:: infomark + +**Authors** Antoine Gravot (Protocole conception) and Misharl Monsoor (for initial galaxy wrapper and R script). + +**Additional W4M contributors** ABiMS TEAM (SU/CNRS - Station biologique de Roscoff) and PFEM (INRAE - MetaboHUB) + +--------------------------------------------------- + +================================ +Metabolites correlation analysis +================================ + +----------- +Description +----------- + +This tool takes as inputs either tabular table files from the metabolomic workflow (variableMetadata, dataMatrix and sampleMetadata) or a table file of your own +and can execute three different functions ("sorting", "corrdel" and "corr_matrix"). + +**The "sorting" function:** *used for metabolomic workflow* + + | 1) First of all, it sorts the data by pcgroup. + | 2) It computes the mean operation of all the signal values of the metabolites by sample, and put the results in a new column "signal_moy". + | 3) It finally creates a tabular output "sorted_variableMetadata.tsv". + +**The "corrdel" function:** *used for metabolomic workflow* + + | **For each pcgroup** of the previous sorted tabular file "sorted_table.tsv", it does the following things: + | - it computes a correlation matrix + | - it determines the metabolites which are not correlated to others from the same pcgroup based on the threshold value filled in the "Correlation threshold for pcgroup" parameter + | - the metabolites are sorted by the mean signal intensity (form the highest to the lowest), and each metabolite is tested to the previous ones in the list ; if the tested metabolite is at least correlated to one previous one, it is tagged as DEL (for "deleted", written in a column called "suppress") + | + | It creates two additional tabular files: + | - "correlation_matrix_selected.tsv" (correlation matrix of selected metabolites only) + | - "sif_table.tsv" (for visualization in CytoScape, based on selected metabolites and "Cytoscape correlation threshold" filled value) + + +**The "corr_matrix" function:** *used for user table file* + + | It computes a correlation matrix named "correlation_matrix.tsv" and creates a sif file named "sif_table.tsv" (for visualization in CytoScape). + + + +----------------- +Workflow position +----------------- + + +**Examples of upstream tools** + ++---------------------------+--------------------------+--------+------------------------+ +| Name | Output file | Format | parameter | ++===========================+==========================+========+========================+ +|xcms findChromPeaks Merger |sampleMetada.tsv | Tabular| Sample metadata | ++---------------------------+--------------------------+--------+------------------------+ +|xcms fillChromPeaks |dataMatrix.tsv | Tabular| Data matrix | ++---------------------------+--------------------------+--------+------------------------+ +|CAMERA.annotate |variableMetadata.tsv | Tabular| Variable metadata | ++---------------------------+--------------------------+--------+------------------------+ + + + +**Examples of downstream tools** + ++---------------------------+--------------------------------------+--------+ +| Name | Output file | Format | ++===========================+======================================+========+ +|Hierarchical Clustering |selected_metabolites_transpo.tsv | Tabular| ++---------------------------+--------------------------------------+--------+ +|ANOVA |selected_metabolites_transpo.tsv | Tabular| ++---------------------------+--------------------------------------+--------+ + + + +**General schema of the metabolomic workflow** + +.. image:: MetaboAnalysisCorrelation_workflow.png + +----------- +Input files +----------- + ++--------------------------------+------------+ +| Parameter: label | Format | ++================================+============+ +| Data matrix | Tabular | ++--------------------------------+------------+ +| Sample metadata | Tabular | ++--------------------------------+------------+ +| Variable metadata | Tabular | ++--------------------------------+------------+ +| User table file | Tabular | ++--------------------------------+------------+ + + +---------- +Parameters +---------- + +**Choice of your input files** + + | **variableMetadata** + | + | For example, the "variableMetadata.tsv" tabular file generated by the CAMERA.annotate step of the workflow. + | This table must contain in particular two columns named "**pcgroup**" and "**rt**" (it is case-sensitive). + | + | **dataMatrix** + | + | For example, the "dataMatrix.tsv" tabular file generated by the CAMERA.annotate step of the workflow. + | + | **sampleMetadata** + | + | For example, the tabular file with the samples metadata generated by the xcmsSet step: one sample per line and at least two columns: ids and one variable. + | + | **user table** + | + | Tabular containing intensities where your variables (metabolites) are in columns (for example a transposition of your datamatrix file) + +**Correlation threshold for pcgroup** *(metabolomic workflow only)* + +The threshold value that will determine if two metabolites are correlated inside a same pcgroup after the creation of the global correlation matrix. +If you do not want to use the intra-pcgroup filter (see "corrdel" function in the description section), put this threshold to 1 and all ions will be kept. + +**Choice of the correlation method** + +Choose the correlation method (pearson, kendall or spearman). + +**Cytoscape correlation threshold** + +Choose a threshold value for selecting edges (i.e. correlations between metabolites) that will be exported to the Cytoscape sif format file. + +------------ +Output files +------------ + + + +**sorted_variableMetadata.tsv** *(metabolomic workflow only)* + + | A tabular file which: + | 1) contains the original variable metadata columns + | 2) is sorted by the pcgroup column + | 3) contains a new column "signal_moy" (mean of all the signal values of the metabolites by sample) + | 4) (depending of parameters) contains a "suppress" column + +**correlation_matrix_selected.tsv** *(metabolomic workflow only)* + + | A correlation matrix containing only the metabolites selected in each pcgroup (metabolites tagged as "DEL" in "suppress" column are removed), + | completed with two columns "rtmed" and "signal_moy". + +**sif_table.tsv** + + | A tabular file (three columns: Metabolite1, Correlation coefficient, Metabolite 2) that can be used in Cytoscape. + +------ + +.. class:: infomark + +The output **selected_metabolites_dataMatrix.tsv** is a tabular file. You can continue your analysis using it for example in the following statistical tools: + | Hierarchical Clustering + | ANOVA + + +--------------------------------------------------- + +Changelog/News +-------------- + + +**Version 1.0.1+galaxy0 - 10/12/2020** + +- Update of some of the outputs' formats to match standard W4M table format +- Standard output (stdout) log improvement +- Change of testing data for faster job running for tests +- Fix: generation of the outputs for "Your table file" option + +**Version 1.0.1 - 20/09/2016** + +- TEST: refactoring to pass functional test using conda dependencies +- Help improvement + + +**Version 20141118 - 18/11/2014** + + </help> + <citations> + <citation type="doi">10.1093/bioinformatics/btu813</citation> + </citations> + +</tool>