comparison abims_correlation_analysis.xml @ 0:58997c28b268 draft default tip

"planemo upload for repository https://github.com/workflow4metabolomics/tools-metabolomics/blob/master/tools/correlation_analysis/ commit 35a01e4ef59a91f43d0b1de1d08db29dcc7aae1e"
author workflow4metabolomics
date Tue, 19 Jan 2021 16:41:47 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:58997c28b268
1 <tool id="correlation_analysis" name="Metabolites Correlation Analysis" version="1.0.1+galaxy0" >
2
3 <description>to highlight ion correlations considering PC-groups</description>
4
5 <requirements>
6 <requirement type="package" version="1.1_5">r-batch</requirement>
7 <requirement type="package" version="0.8.8">r-reshape</requirement>
8 <requirement type="package" version="7.3_53">r-mass</requirement>
9 </requirements>
10
11 <command detect_errors='exit_code'>
12 Rscript '$__tool_directory__/correlation_analysis.r'
13
14 #if $cond_input_type.select_input_type == "select_input_from_w4m" and $cond_input_type.cond_function.select_funtion == "sort_only" :
15 sorting 1 variable_metadata '$cond_input_type.variableMetadata'
16 data_matrix '$cond_input_type.dataMatrix'
17 sample_metadata '$cond_input_type.sampleMetadata'
18 corrdel 0
19 param_correlation ""
20 param_cytoscape ""
21 matrix_corr 0
22 user_matrix_corr ""
23 corr_method ""
24 #end if
25 #if $cond_input_type.select_input_type == "select_input_from_w4m" and $cond_input_type.cond_function.select_funtion == "sort_and_corr" :
26 sorting 1
27 variable_metadata '$cond_input_type.variableMetadata'
28 data_matrix '$cond_input_type.dataMatrix'
29 sample_metadata '$cond_input_type.sampleMetadata'
30 corrdel 1
31 param_correlation $cond_input_type.cond_function.param_correlation
32 param_cytoscape $cond_input_type.cond_function.param_cytoscape
33 matrix_corr 0
34 user_matrix_corr ""
35 corr_method $cond_input_type.cond_function.corr_method
36 #end if
37 ##Create correlation matrix from a user table file.##
38 #if $cond_input_type.select_input_type == "select_input_other" :
39 sorting 0
40 variable_metadata ""
41 data_matrix ""
42 sample_metadata ""
43 corrdel 0
44 param_correlation ""
45 param_cytoscape $cond_input_type.param_cytoscape
46 matrix_corr 1
47 user_matrix_corr '$cond_input_type.user_matrix_corr'
48 corr_method $cond_input_type.corr_method
49 #end if
50
51 </command>
52
53 <inputs>
54
55
56 <conditional name="cond_input_type" >
57 <param name="select_input_type" type="select" label="Choice of your input files" help="" >
58 <option value="select_input_from_w4m" selected="true">Files from the metabolomic workflow</option>
59 <option value="select_input_other" >Your table file</option>
60 </param>
61 <when value="select_input_from_w4m">
62 <param name="dataMatrix" type="data" label="Data matrix" format="tabular" help="dataMatrix file from the CAMERA.annotate step for example" />
63 <param name="sampleMetadata" type="data" label="Sample metadata" format="tabular" help="sampleMetadata file from the xcms.xcmsSet step for example" />
64 <param name="variableMetadata" type="data" label="Variable metadata" format="tabular" help="variableMetadata file from the CAMERA.annotate step for example" />
65 <conditional name="cond_function" >
66 <param name="select_funtion" type="select" label="Function to be used" help="" >
67 <option value="sort_only" selected="true">Sorting your table</option>
68 <option value="sort_and_corr" >Sorting your table and doing correlation analysis</option>
69 </param>
70 <when value="sort_only" />
71 <when value="sort_and_corr">
72 <param name="corrdel" type="hidden" value="1"/>
73 <param name="param_correlation" type="float" label="Correlation threshold for pcgroup" value="0.60" help="Threshold value for selecting edges (i.e. correlations) that will be exported to the Cytoscape sif format file" />
74 <param name="corr_method" type="select" label="Choice of the correlation method" help="" >
75 <option value="pearson" selected="true">pearson</option>
76 <option value="kendall">kendall</option>
77 <option value="spearman">spearman</option>
78 </param>
79 <param name="param_cytoscape" type="float" label="Cytoscape correlation threshold" value="0.75" help="Choose a threshold value for selecting metabolites that will be exported to a cytoscape sif format" />
80 </when>
81 </conditional>
82 </when>
83
84 <when value="select_input_other">
85 <param name="user_matrix_corr" type="data" label="Your table file (tabular format)" format="tabular" help="Your metabolites (variables) intensity table file (tabular format)" />
86 <param name="corr_method" type="select" label="Choice of the correlation method" help="" >
87 <option value="pearson" selected="true">pearson</option>
88 <option value="kendall">kendall</option>
89 <option value="spearman">spearman</option>
90 </param>
91 <param name="param_cytoscape" type="float" label="Cytoscape correlation threshold" value="0.75" help="Threshold value for selecting edges (i.e. correlations) that will be exported to the Cytoscape sif format file" />
92 </when>
93 </conditional>
94 </inputs>
95
96 <outputs>
97 <data name="sorted_table" format="tabular" from_work_dir="sorted_table.tsv" label="sorted_variableMetadata.tsv">
98 <filter>(cond_input_type['select_input_type'] == 'select_input_from_w4m')</filter>
99 </data>
100 <data name="correlation_matrix_selected" format="tabular" from_work_dir="correlation_matrix_selected.tsv" label="correlation_matrix_selected.tsv">
101 <filter>(cond_input_type['select_input_type'] == 'select_input_from_w4m' and cond_input_type['cond_function']['select_funtion']== 'sort_and_corr' and cond_input_type['cond_function']['corrdel']== '1' )</filter>
102 </data>
103 <data name="siff_table" format="tabular" from_work_dir="siff_table.tsv" label="sif_table.tsv">
104 <filter>(cond_input_type['select_input_type'] == 'select_input_from_w4m' and cond_input_type['cond_function']['select_funtion']== 'sort_and_corr' and cond_input_type['cond_function']['corrdel']== '1' )</filter>
105 </data>
106 <data name="correlation_matrix_user" format="tabular" from_work_dir="correlation_matrix.tsv" label="correlation_matrix.tsv">
107 <filter>(cond_input_type['select_input_type'] == 'select_input_other')</filter>
108 </data>
109 <data name="siff_table_user_user" format="tabular" from_work_dir="siff_table.tsv" label="sif_table.tsv">
110 <filter>(cond_input_type['select_input_type'] == 'select_input_other')</filter>
111 </data>
112 </outputs>
113
114 <tests>
115 <test expect_num_outputs="3">
116 <conditional name="cond_input_type" >
117 <param name="select_input_type" value="select_input_from_w4m" />
118 <param name="dataMatrix" value="in_DM1.tabular" ftype="tabular" />
119 <param name="sampleMetadata" value="in_SM1.tabular" ftype="tabular" />
120 <param name="variableMetadata" value="in_VM1.tabular" ftype="tabular" />
121 <conditional name="cond_function" >
122 <param name="select_funtion" value="sort_and_corr" />
123 </conditional>
124 </conditional>
125 <output name="sorted_table" file="out_VM1.tabular"/>
126 <output name="correlation_matrix_selected" file="out_corr1.tabular"/>
127 <output name="siff_table" file="out_sif1.tabular"/>
128 </test>
129 </tests>
130
131 <help>
132
133 .. class:: infomark
134
135 **Authors** Antoine Gravot (Protocole conception) and Misharl Monsoor (for initial galaxy wrapper and R script).
136
137 **Additional W4M contributors** ABiMS TEAM (SU/CNRS - Station biologique de Roscoff) and PFEM (INRAE - MetaboHUB)
138
139 ---------------------------------------------------
140
141 ================================
142 Metabolites correlation analysis
143 ================================
144
145 -----------
146 Description
147 -----------
148
149 This tool takes as inputs either tabular table files from the metabolomic workflow (variableMetadata, dataMatrix and sampleMetadata) or a table file of your own
150 and can execute three different functions ("sorting", "corrdel" and "corr_matrix").
151
152 **The "sorting" function:** *used for metabolomic workflow*
153
154 | 1) First of all, it sorts the data by pcgroup.
155 | 2) It computes the mean operation of all the signal values of the metabolites by sample, and put the results in a new column "signal_moy".
156 | 3) It finally creates a tabular output "sorted_variableMetadata.tsv".
157
158 **The "corrdel" function:** *used for metabolomic workflow*
159
160 | **For each pcgroup** of the previous sorted tabular file "sorted_table.tsv", it does the following things:
161 | - it computes a correlation matrix
162 | - it determines the metabolites which are not correlated to others from the same pcgroup based on the threshold value filled in the "Correlation threshold for pcgroup" parameter
163 | - the metabolites are sorted by the mean signal intensity (form the highest to the lowest), and each metabolite is tested to the previous ones in the list ; if the tested metabolite is at least correlated to one previous one, it is tagged as DEL (for "deleted", written in a column called "suppress")
164 |
165 | It creates two additional tabular files:
166 | - "correlation_matrix_selected.tsv" (correlation matrix of selected metabolites only)
167 | - "sif_table.tsv" (for visualization in CytoScape, based on selected metabolites and "Cytoscape correlation threshold" filled value)
168
169
170 **The "corr_matrix" function:** *used for user table file*
171
172 | It computes a correlation matrix named "correlation_matrix.tsv" and creates a sif file named "sif_table.tsv" (for visualization in CytoScape).
173
174
175
176 -----------------
177 Workflow position
178 -----------------
179
180
181 **Examples of upstream tools**
182
183 +---------------------------+--------------------------+--------+------------------------+
184 | Name | Output file | Format | parameter |
185 +===========================+==========================+========+========================+
186 |xcms findChromPeaks Merger |sampleMetada.tsv | Tabular| Sample metadata |
187 +---------------------------+--------------------------+--------+------------------------+
188 |xcms fillChromPeaks |dataMatrix.tsv | Tabular| Data matrix |
189 +---------------------------+--------------------------+--------+------------------------+
190 |CAMERA.annotate |variableMetadata.tsv | Tabular| Variable metadata |
191 +---------------------------+--------------------------+--------+------------------------+
192
193
194
195 **Examples of downstream tools**
196
197 +---------------------------+--------------------------------------+--------+
198 | Name | Output file | Format |
199 +===========================+======================================+========+
200 |Hierarchical Clustering |selected_metabolites_transpo.tsv | Tabular|
201 +---------------------------+--------------------------------------+--------+
202 |ANOVA |selected_metabolites_transpo.tsv | Tabular|
203 +---------------------------+--------------------------------------+--------+
204
205
206
207 **General schema of the metabolomic workflow**
208
209 .. image:: MetaboAnalysisCorrelation_workflow.png
210
211 -----------
212 Input files
213 -----------
214
215 +--------------------------------+------------+
216 | Parameter: label | Format |
217 +================================+============+
218 | Data matrix | Tabular |
219 +--------------------------------+------------+
220 | Sample metadata | Tabular |
221 +--------------------------------+------------+
222 | Variable metadata | Tabular |
223 +--------------------------------+------------+
224 | User table file | Tabular |
225 +--------------------------------+------------+
226
227
228 ----------
229 Parameters
230 ----------
231
232 **Choice of your input files**
233
234 | **variableMetadata**
235 |
236 | For example, the "variableMetadata.tsv" tabular file generated by the CAMERA.annotate step of the workflow.
237 | This table must contain in particular two columns named "**pcgroup**" and "**rt**" (it is case-sensitive).
238 |
239 | **dataMatrix**
240 |
241 | For example, the "dataMatrix.tsv" tabular file generated by the CAMERA.annotate step of the workflow.
242 |
243 | **sampleMetadata**
244 |
245 | For example, the tabular file with the samples metadata generated by the xcmsSet step: one sample per line and at least two columns: ids and one variable.
246 |
247 | **user table**
248 |
249 | Tabular containing intensities where your variables (metabolites) are in columns (for example a transposition of your datamatrix file)
250
251 **Correlation threshold for pcgroup** *(metabolomic workflow only)*
252
253 The threshold value that will determine if two metabolites are correlated inside a same pcgroup after the creation of the global correlation matrix.
254 If you do not want to use the intra-pcgroup filter (see "corrdel" function in the description section), put this threshold to 1 and all ions will be kept.
255
256 **Choice of the correlation method**
257
258 Choose the correlation method (pearson, kendall or spearman).
259
260 **Cytoscape correlation threshold**
261
262 Choose a threshold value for selecting edges (i.e. correlations between metabolites) that will be exported to the Cytoscape sif format file.
263
264 ------------
265 Output files
266 ------------
267
268
269
270 **sorted_variableMetadata.tsv** *(metabolomic workflow only)*
271
272 | A tabular file which:
273 | 1) contains the original variable metadata columns
274 | 2) is sorted by the pcgroup column
275 | 3) contains a new column "signal_moy" (mean of all the signal values of the metabolites by sample)
276 | 4) (depending of parameters) contains a "suppress" column
277
278 **correlation_matrix_selected.tsv** *(metabolomic workflow only)*
279
280 | A correlation matrix containing only the metabolites selected in each pcgroup (metabolites tagged as "DEL" in "suppress" column are removed),
281 | completed with two columns "rtmed" and "signal_moy".
282
283 **sif_table.tsv**
284
285 | A tabular file (three columns: Metabolite1, Correlation coefficient, Metabolite 2) that can be used in Cytoscape.
286
287 ------
288
289 .. class:: infomark
290
291 The output **selected_metabolites_dataMatrix.tsv** is a tabular file. You can continue your analysis using it for example in the following statistical tools:
292 | Hierarchical Clustering
293 | ANOVA
294
295
296 ---------------------------------------------------
297
298 Changelog/News
299 --------------
300
301
302 **Version 1.0.1+galaxy0 - 10/12/2020**
303
304 - Update of some of the outputs' formats to match standard W4M table format
305 - Standard output (stdout) log improvement
306 - Change of testing data for faster job running for tests
307 - Fix: generation of the outputs for "Your table file" option
308
309 **Version 1.0.1 - 20/09/2016**
310
311 - TEST: refactoring to pass functional test using conda dependencies
312 - Help improvement
313
314
315 **Version 20141118 - 18/11/2014**
316
317 </help>
318 <citations>
319 <citation type="doi">10.1093/bioinformatics/btu813</citation>
320 </citations>
321
322 </tool>