diff abims_correlation_analysis.xml @ 0:58997c28b268 draft default tip

"planemo upload for repository https://github.com/workflow4metabolomics/tools-metabolomics/blob/master/tools/correlation_analysis/ commit 35a01e4ef59a91f43d0b1de1d08db29dcc7aae1e"
author workflow4metabolomics
date Tue, 19 Jan 2021 16:41:47 +0000
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/abims_correlation_analysis.xml	Tue Jan 19 16:41:47 2021 +0000
@@ -0,0 +1,322 @@
+<tool id="correlation_analysis" name="Metabolites Correlation Analysis" version="1.0.1+galaxy0" >
+
+    <description>to highlight ion correlations considering PC-groups</description>
+
+    <requirements>
+        <requirement type="package" version="1.1_5">r-batch</requirement>
+        <requirement type="package" version="0.8.8">r-reshape</requirement>
+        <requirement type="package" version="7.3_53">r-mass</requirement>
+    </requirements>
+
+    <command detect_errors='exit_code'>
+        Rscript '$__tool_directory__/correlation_analysis.r'
+
+        #if $cond_input_type.select_input_type == "select_input_from_w4m" and $cond_input_type.cond_function.select_funtion == "sort_only" :
+            sorting 1 variable_metadata '$cond_input_type.variableMetadata'
+            data_matrix '$cond_input_type.dataMatrix'
+            sample_metadata '$cond_input_type.sampleMetadata'
+            corrdel 0
+            param_correlation ""
+            param_cytoscape ""
+            matrix_corr 0
+            user_matrix_corr ""
+            corr_method ""
+        #end if
+        #if $cond_input_type.select_input_type == "select_input_from_w4m" and $cond_input_type.cond_function.select_funtion == "sort_and_corr" :
+            sorting 1
+            variable_metadata '$cond_input_type.variableMetadata'
+            data_matrix '$cond_input_type.dataMatrix'
+            sample_metadata '$cond_input_type.sampleMetadata'
+            corrdel 1
+            param_correlation $cond_input_type.cond_function.param_correlation
+            param_cytoscape $cond_input_type.cond_function.param_cytoscape
+            matrix_corr 0
+            user_matrix_corr ""
+            corr_method $cond_input_type.cond_function.corr_method
+        #end if
+        ##Create correlation matrix from a user table file.##
+        #if $cond_input_type.select_input_type == "select_input_other" :
+            sorting 0
+            variable_metadata ""
+            data_matrix ""
+            sample_metadata ""
+            corrdel 0
+            param_correlation ""
+            param_cytoscape $cond_input_type.param_cytoscape
+            matrix_corr 1
+            user_matrix_corr '$cond_input_type.user_matrix_corr'
+            corr_method $cond_input_type.corr_method
+        #end if
+
+    </command>
+
+    <inputs>
+
+
+         <conditional name="cond_input_type" >
+            <param name="select_input_type" type="select" label="Choice of your input files" help="" >
+                <option value="select_input_from_w4m" selected="true">Files from the metabolomic workflow</option>
+                <option value="select_input_other" >Your table file</option>
+            </param>
+            <when value="select_input_from_w4m">
+                <param name="dataMatrix" type="data" label="Data matrix" format="tabular" help="dataMatrix file from the CAMERA.annotate step for example" />
+                <param name="sampleMetadata" type="data" label="Sample metadata" format="tabular" help="sampleMetadata file from the xcms.xcmsSet step for example" />
+                <param name="variableMetadata" type="data" label="Variable metadata" format="tabular" help="variableMetadata file from the CAMERA.annotate step for example" />
+                <conditional name="cond_function" >
+                    <param name="select_funtion" type="select" label="Function to be used" help="" >
+                        <option value="sort_only" selected="true">Sorting your table</option>
+                        <option value="sort_and_corr" >Sorting your table and doing correlation analysis</option>
+                    </param>
+                    <when value="sort_only" />
+                    <when value="sort_and_corr">
+                         <param name="corrdel" type="hidden" value="1"/>
+                         <param name="param_correlation" type="float" label="Correlation threshold for pcgroup" value="0.60" help="Threshold value for selecting edges (i.e. correlations) that will be exported to the Cytoscape sif format file" />
+                         <param name="corr_method" type="select" label="Choice of the correlation method" help="" >
+                            <option value="pearson" selected="true">pearson</option>
+                            <option value="kendall">kendall</option>
+                            <option value="spearman">spearman</option>
+                         </param>
+                         <param name="param_cytoscape" type="float" label="Cytoscape correlation threshold" value="0.75" help="Choose a threshold value for selecting metabolites that will be exported to a cytoscape sif format" />
+                    </when>
+                </conditional>
+            </when>
+
+            <when value="select_input_other">
+                <param name="user_matrix_corr" type="data" label="Your table file (tabular format)" format="tabular" help="Your metabolites (variables) intensity table file (tabular format)" />
+                <param name="corr_method" type="select" label="Choice of the correlation method" help="" >
+                    <option value="pearson" selected="true">pearson</option>
+                    <option value="kendall">kendall</option>
+                    <option value="spearman">spearman</option>
+                </param>
+                <param name="param_cytoscape" type="float" label="Cytoscape correlation threshold" value="0.75" help="Threshold value for selecting edges (i.e. correlations) that will be exported to the Cytoscape sif format file" />
+            </when>
+        </conditional>
+    </inputs>
+
+    <outputs>
+        <data name="sorted_table" format="tabular" from_work_dir="sorted_table.tsv" label="sorted_variableMetadata.tsv">
+             <filter>(cond_input_type['select_input_type'] == 'select_input_from_w4m')</filter>
+        </data>
+        <data name="correlation_matrix_selected" format="tabular" from_work_dir="correlation_matrix_selected.tsv" label="correlation_matrix_selected.tsv">
+            <filter>(cond_input_type['select_input_type'] == 'select_input_from_w4m' and cond_input_type['cond_function']['select_funtion']== 'sort_and_corr' and cond_input_type['cond_function']['corrdel']== '1' )</filter>
+        </data>
+        <data name="siff_table" format="tabular" from_work_dir="siff_table.tsv" label="sif_table.tsv">
+            <filter>(cond_input_type['select_input_type'] == 'select_input_from_w4m' and cond_input_type['cond_function']['select_funtion']== 'sort_and_corr' and cond_input_type['cond_function']['corrdel']== '1' )</filter>
+        </data>
+        <data name="correlation_matrix_user" format="tabular" from_work_dir="correlation_matrix.tsv" label="correlation_matrix.tsv">
+            <filter>(cond_input_type['select_input_type'] == 'select_input_other')</filter>
+        </data>
+        <data name="siff_table_user_user" format="tabular" from_work_dir="siff_table.tsv" label="sif_table.tsv">
+            <filter>(cond_input_type['select_input_type'] == 'select_input_other')</filter>
+        </data>
+    </outputs>
+
+    <tests>
+        <test expect_num_outputs="3">
+            <conditional name="cond_input_type" >
+                <param name="select_input_type" value="select_input_from_w4m" />
+                <param name="dataMatrix" value="in_DM1.tabular" ftype="tabular" />
+                <param name="sampleMetadata" value="in_SM1.tabular" ftype="tabular" />
+                <param name="variableMetadata" value="in_VM1.tabular" ftype="tabular" />
+                <conditional name="cond_function" >
+                    <param name="select_funtion" value="sort_and_corr" />
+                </conditional>
+            </conditional>
+            <output name="sorted_table" file="out_VM1.tabular"/>
+            <output name="correlation_matrix_selected" file="out_corr1.tabular"/>
+            <output name="siff_table" file="out_sif1.tabular"/>
+        </test>
+    </tests>
+
+    <help>
+
+.. class:: infomark
+
+**Authors** Antoine Gravot (Protocole conception) and Misharl Monsoor (for initial galaxy wrapper and R script).
+
+**Additional W4M contributors** ABiMS TEAM (SU/CNRS - Station biologique de Roscoff) and PFEM (INRAE - MetaboHUB)
+
+---------------------------------------------------
+
+================================
+Metabolites correlation analysis
+================================
+
+-----------
+Description
+-----------
+
+This tool takes as inputs either tabular table files from the metabolomic workflow (variableMetadata, dataMatrix and sampleMetadata) or a table file of your own
+and can execute three different functions ("sorting", "corrdel" and "corr_matrix").
+
+**The "sorting" function:** *used for metabolomic workflow*
+
+ | 1) First of all, it sorts the data by pcgroup.
+ | 2) It computes the mean operation of all the signal values of the metabolites by sample, and put the results in a new column "signal_moy".
+ | 3) It finally creates a tabular output "sorted_variableMetadata.tsv".
+
+**The "corrdel" function:** *used for metabolomic workflow*
+
+ | **For each pcgroup** of the previous sorted tabular file "sorted_table.tsv", it does the following things:
+ | - it computes a correlation matrix
+ | - it determines the metabolites which are not correlated to others from the same pcgroup based on the threshold value filled in the "Correlation threshold for pcgroup" parameter
+ | - the metabolites are sorted by the mean signal intensity (form the highest to the lowest), and each metabolite is tested to the previous ones in the list ; if the tested metabolite is at least correlated to one previous one, it is tagged as DEL (for "deleted", written in a column called "suppress")
+ |
+ | It creates two additional tabular files:
+ | - "correlation_matrix_selected.tsv" (correlation matrix of selected metabolites only)
+ | - "sif_table.tsv" (for visualization in CytoScape, based on selected metabolites and "Cytoscape correlation threshold" filled value)
+
+
+**The "corr_matrix" function:** *used for user table file*
+
+ | It computes a correlation matrix named "correlation_matrix.tsv" and creates a sif file named "sif_table.tsv" (for visualization in CytoScape).
+
+
+
+-----------------
+Workflow position
+-----------------
+
+
+**Examples of upstream tools**
+
++---------------------------+--------------------------+--------+------------------------+
+| Name                      | Output file              | Format | parameter              |
++===========================+==========================+========+========================+
+|xcms findChromPeaks Merger |sampleMetada.tsv          | Tabular| Sample metadata        |
++---------------------------+--------------------------+--------+------------------------+
+|xcms fillChromPeaks        |dataMatrix.tsv            | Tabular| Data matrix            |
++---------------------------+--------------------------+--------+------------------------+
+|CAMERA.annotate            |variableMetadata.tsv      | Tabular| Variable metadata      |
++---------------------------+--------------------------+--------+------------------------+
+
+
+
+**Examples of downstream tools**
+
++---------------------------+--------------------------------------+--------+
+| Name                      | Output file                          | Format |
++===========================+======================================+========+
+|Hierarchical Clustering    |selected_metabolites_transpo.tsv      | Tabular|
++---------------------------+--------------------------------------+--------+
+|ANOVA                      |selected_metabolites_transpo.tsv      | Tabular|
++---------------------------+--------------------------------------+--------+
+
+
+
+**General schema of the metabolomic workflow**
+
+.. image:: MetaboAnalysisCorrelation_workflow.png
+
+-----------
+Input files
+-----------
+
++--------------------------------+------------+
+| Parameter: label               | Format     |
++================================+============+
+| Data matrix                    | Tabular    |
++--------------------------------+------------+
+| Sample metadata                | Tabular    |
++--------------------------------+------------+
+| Variable metadata              | Tabular    |
++--------------------------------+------------+
+| User table file                | Tabular    |
++--------------------------------+------------+
+
+
+----------
+Parameters
+----------
+
+**Choice of your input files**
+
+ | **variableMetadata**
+ |
+ | For example, the "variableMetadata.tsv" tabular file generated by the CAMERA.annotate step of the workflow.
+ | This table must contain in particular two columns named "**pcgroup**" and "**rt**" (it is case-sensitive).
+ |
+ | **dataMatrix**
+ |
+ | For example, the "dataMatrix.tsv" tabular file generated by the CAMERA.annotate step of the workflow.
+ |
+ | **sampleMetadata**
+ |
+ | For example, the tabular file with the samples metadata generated by the xcmsSet step: one sample per line and at least two columns: ids and one variable.
+ |
+ | **user table**
+ |
+ | Tabular containing intensities where your variables (metabolites) are in columns (for example a transposition of your datamatrix file)
+
+**Correlation threshold for pcgroup** *(metabolomic workflow only)*
+
+The threshold value that will determine if two metabolites are correlated inside a same pcgroup after the creation of the global correlation matrix.
+If you do not want to use the intra-pcgroup filter (see "corrdel" function in the description section), put this threshold to 1 and all ions will be kept.
+
+**Choice of the correlation method**
+
+Choose the correlation method (pearson, kendall or spearman).
+
+**Cytoscape correlation threshold**
+
+Choose a threshold value for selecting edges (i.e. correlations between metabolites) that will be exported to the Cytoscape sif format file.
+
+------------
+Output files
+------------
+
+
+
+**sorted_variableMetadata.tsv** *(metabolomic workflow only)*
+
+ | A tabular file which:
+ | 1) contains the original variable metadata columns
+ | 2) is sorted by the pcgroup column
+ | 3) contains a new column "signal_moy" (mean of all the signal values of the metabolites by sample)
+ | 4) (depending of parameters) contains a "suppress" column
+
+**correlation_matrix_selected.tsv** *(metabolomic workflow only)*
+
+ | A correlation matrix containing only the metabolites selected in each pcgroup (metabolites tagged as "DEL" in "suppress" column are removed),
+ | completed with two columns "rtmed" and "signal_moy".
+
+**sif_table.tsv**
+
+ | A tabular file (three columns: Metabolite1, Correlation coefficient, Metabolite 2) that can be used in Cytoscape.
+
+------
+
+.. class:: infomark
+
+The output **selected_metabolites_dataMatrix.tsv** is a tabular file. You can continue your analysis using it for example in the following statistical tools:
+ | Hierarchical Clustering
+ | ANOVA
+
+
+---------------------------------------------------
+
+Changelog/News
+--------------
+
+
+**Version 1.0.1+galaxy0 - 10/12/2020**
+
+- Update of some of the outputs' formats to match standard W4M table format
+- Standard output (stdout) log improvement
+- Change of testing data for faster job running for tests
+- Fix: generation of the outputs for "Your table file" option
+
+**Version 1.0.1 - 20/09/2016**
+
+- TEST: refactoring to pass functional test using conda dependencies
+- Help improvement
+
+
+**Version 20141118 - 18/11/2014**
+
+    </help>
+    <citations>
+        <citation type="doi">10.1093/bioinformatics/btu813</citation>
+    </citations>
+
+</tool>