Mercurial > repos > workflow4metabolomics > correlation_analysis
comparison abims_correlation_analysis.xml @ 0:58997c28b268 draft default tip
"planemo upload for repository https://github.com/workflow4metabolomics/tools-metabolomics/blob/master/tools/correlation_analysis/ commit 35a01e4ef59a91f43d0b1de1d08db29dcc7aae1e"
| author | workflow4metabolomics |
|---|---|
| date | Tue, 19 Jan 2021 16:41:47 +0000 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:58997c28b268 |
|---|---|
| 1 <tool id="correlation_analysis" name="Metabolites Correlation Analysis" version="1.0.1+galaxy0" > | |
| 2 | |
| 3 <description>to highlight ion correlations considering PC-groups</description> | |
| 4 | |
| 5 <requirements> | |
| 6 <requirement type="package" version="1.1_5">r-batch</requirement> | |
| 7 <requirement type="package" version="0.8.8">r-reshape</requirement> | |
| 8 <requirement type="package" version="7.3_53">r-mass</requirement> | |
| 9 </requirements> | |
| 10 | |
| 11 <command detect_errors='exit_code'> | |
| 12 Rscript '$__tool_directory__/correlation_analysis.r' | |
| 13 | |
| 14 #if $cond_input_type.select_input_type == "select_input_from_w4m" and $cond_input_type.cond_function.select_funtion == "sort_only" : | |
| 15 sorting 1 variable_metadata '$cond_input_type.variableMetadata' | |
| 16 data_matrix '$cond_input_type.dataMatrix' | |
| 17 sample_metadata '$cond_input_type.sampleMetadata' | |
| 18 corrdel 0 | |
| 19 param_correlation "" | |
| 20 param_cytoscape "" | |
| 21 matrix_corr 0 | |
| 22 user_matrix_corr "" | |
| 23 corr_method "" | |
| 24 #end if | |
| 25 #if $cond_input_type.select_input_type == "select_input_from_w4m" and $cond_input_type.cond_function.select_funtion == "sort_and_corr" : | |
| 26 sorting 1 | |
| 27 variable_metadata '$cond_input_type.variableMetadata' | |
| 28 data_matrix '$cond_input_type.dataMatrix' | |
| 29 sample_metadata '$cond_input_type.sampleMetadata' | |
| 30 corrdel 1 | |
| 31 param_correlation $cond_input_type.cond_function.param_correlation | |
| 32 param_cytoscape $cond_input_type.cond_function.param_cytoscape | |
| 33 matrix_corr 0 | |
| 34 user_matrix_corr "" | |
| 35 corr_method $cond_input_type.cond_function.corr_method | |
| 36 #end if | |
| 37 ##Create correlation matrix from a user table file.## | |
| 38 #if $cond_input_type.select_input_type == "select_input_other" : | |
| 39 sorting 0 | |
| 40 variable_metadata "" | |
| 41 data_matrix "" | |
| 42 sample_metadata "" | |
| 43 corrdel 0 | |
| 44 param_correlation "" | |
| 45 param_cytoscape $cond_input_type.param_cytoscape | |
| 46 matrix_corr 1 | |
| 47 user_matrix_corr '$cond_input_type.user_matrix_corr' | |
| 48 corr_method $cond_input_type.corr_method | |
| 49 #end if | |
| 50 | |
| 51 </command> | |
| 52 | |
| 53 <inputs> | |
| 54 | |
| 55 | |
| 56 <conditional name="cond_input_type" > | |
| 57 <param name="select_input_type" type="select" label="Choice of your input files" help="" > | |
| 58 <option value="select_input_from_w4m" selected="true">Files from the metabolomic workflow</option> | |
| 59 <option value="select_input_other" >Your table file</option> | |
| 60 </param> | |
| 61 <when value="select_input_from_w4m"> | |
| 62 <param name="dataMatrix" type="data" label="Data matrix" format="tabular" help="dataMatrix file from the CAMERA.annotate step for example" /> | |
| 63 <param name="sampleMetadata" type="data" label="Sample metadata" format="tabular" help="sampleMetadata file from the xcms.xcmsSet step for example" /> | |
| 64 <param name="variableMetadata" type="data" label="Variable metadata" format="tabular" help="variableMetadata file from the CAMERA.annotate step for example" /> | |
| 65 <conditional name="cond_function" > | |
| 66 <param name="select_funtion" type="select" label="Function to be used" help="" > | |
| 67 <option value="sort_only" selected="true">Sorting your table</option> | |
| 68 <option value="sort_and_corr" >Sorting your table and doing correlation analysis</option> | |
| 69 </param> | |
| 70 <when value="sort_only" /> | |
| 71 <when value="sort_and_corr"> | |
| 72 <param name="corrdel" type="hidden" value="1"/> | |
| 73 <param name="param_correlation" type="float" label="Correlation threshold for pcgroup" value="0.60" help="Threshold value for selecting edges (i.e. correlations) that will be exported to the Cytoscape sif format file" /> | |
| 74 <param name="corr_method" type="select" label="Choice of the correlation method" help="" > | |
| 75 <option value="pearson" selected="true">pearson</option> | |
| 76 <option value="kendall">kendall</option> | |
| 77 <option value="spearman">spearman</option> | |
| 78 </param> | |
| 79 <param name="param_cytoscape" type="float" label="Cytoscape correlation threshold" value="0.75" help="Choose a threshold value for selecting metabolites that will be exported to a cytoscape sif format" /> | |
| 80 </when> | |
| 81 </conditional> | |
| 82 </when> | |
| 83 | |
| 84 <when value="select_input_other"> | |
| 85 <param name="user_matrix_corr" type="data" label="Your table file (tabular format)" format="tabular" help="Your metabolites (variables) intensity table file (tabular format)" /> | |
| 86 <param name="corr_method" type="select" label="Choice of the correlation method" help="" > | |
| 87 <option value="pearson" selected="true">pearson</option> | |
| 88 <option value="kendall">kendall</option> | |
| 89 <option value="spearman">spearman</option> | |
| 90 </param> | |
| 91 <param name="param_cytoscape" type="float" label="Cytoscape correlation threshold" value="0.75" help="Threshold value for selecting edges (i.e. correlations) that will be exported to the Cytoscape sif format file" /> | |
| 92 </when> | |
| 93 </conditional> | |
| 94 </inputs> | |
| 95 | |
| 96 <outputs> | |
| 97 <data name="sorted_table" format="tabular" from_work_dir="sorted_table.tsv" label="sorted_variableMetadata.tsv"> | |
| 98 <filter>(cond_input_type['select_input_type'] == 'select_input_from_w4m')</filter> | |
| 99 </data> | |
| 100 <data name="correlation_matrix_selected" format="tabular" from_work_dir="correlation_matrix_selected.tsv" label="correlation_matrix_selected.tsv"> | |
| 101 <filter>(cond_input_type['select_input_type'] == 'select_input_from_w4m' and cond_input_type['cond_function']['select_funtion']== 'sort_and_corr' and cond_input_type['cond_function']['corrdel']== '1' )</filter> | |
| 102 </data> | |
| 103 <data name="siff_table" format="tabular" from_work_dir="siff_table.tsv" label="sif_table.tsv"> | |
| 104 <filter>(cond_input_type['select_input_type'] == 'select_input_from_w4m' and cond_input_type['cond_function']['select_funtion']== 'sort_and_corr' and cond_input_type['cond_function']['corrdel']== '1' )</filter> | |
| 105 </data> | |
| 106 <data name="correlation_matrix_user" format="tabular" from_work_dir="correlation_matrix.tsv" label="correlation_matrix.tsv"> | |
| 107 <filter>(cond_input_type['select_input_type'] == 'select_input_other')</filter> | |
| 108 </data> | |
| 109 <data name="siff_table_user_user" format="tabular" from_work_dir="siff_table.tsv" label="sif_table.tsv"> | |
| 110 <filter>(cond_input_type['select_input_type'] == 'select_input_other')</filter> | |
| 111 </data> | |
| 112 </outputs> | |
| 113 | |
| 114 <tests> | |
| 115 <test expect_num_outputs="3"> | |
| 116 <conditional name="cond_input_type" > | |
| 117 <param name="select_input_type" value="select_input_from_w4m" /> | |
| 118 <param name="dataMatrix" value="in_DM1.tabular" ftype="tabular" /> | |
| 119 <param name="sampleMetadata" value="in_SM1.tabular" ftype="tabular" /> | |
| 120 <param name="variableMetadata" value="in_VM1.tabular" ftype="tabular" /> | |
| 121 <conditional name="cond_function" > | |
| 122 <param name="select_funtion" value="sort_and_corr" /> | |
| 123 </conditional> | |
| 124 </conditional> | |
| 125 <output name="sorted_table" file="out_VM1.tabular"/> | |
| 126 <output name="correlation_matrix_selected" file="out_corr1.tabular"/> | |
| 127 <output name="siff_table" file="out_sif1.tabular"/> | |
| 128 </test> | |
| 129 </tests> | |
| 130 | |
| 131 <help> | |
| 132 | |
| 133 .. class:: infomark | |
| 134 | |
| 135 **Authors** Antoine Gravot (Protocole conception) and Misharl Monsoor (for initial galaxy wrapper and R script). | |
| 136 | |
| 137 **Additional W4M contributors** ABiMS TEAM (SU/CNRS - Station biologique de Roscoff) and PFEM (INRAE - MetaboHUB) | |
| 138 | |
| 139 --------------------------------------------------- | |
| 140 | |
| 141 ================================ | |
| 142 Metabolites correlation analysis | |
| 143 ================================ | |
| 144 | |
| 145 ----------- | |
| 146 Description | |
| 147 ----------- | |
| 148 | |
| 149 This tool takes as inputs either tabular table files from the metabolomic workflow (variableMetadata, dataMatrix and sampleMetadata) or a table file of your own | |
| 150 and can execute three different functions ("sorting", "corrdel" and "corr_matrix"). | |
| 151 | |
| 152 **The "sorting" function:** *used for metabolomic workflow* | |
| 153 | |
| 154 | 1) First of all, it sorts the data by pcgroup. | |
| 155 | 2) It computes the mean operation of all the signal values of the metabolites by sample, and put the results in a new column "signal_moy". | |
| 156 | 3) It finally creates a tabular output "sorted_variableMetadata.tsv". | |
| 157 | |
| 158 **The "corrdel" function:** *used for metabolomic workflow* | |
| 159 | |
| 160 | **For each pcgroup** of the previous sorted tabular file "sorted_table.tsv", it does the following things: | |
| 161 | - it computes a correlation matrix | |
| 162 | - it determines the metabolites which are not correlated to others from the same pcgroup based on the threshold value filled in the "Correlation threshold for pcgroup" parameter | |
| 163 | - the metabolites are sorted by the mean signal intensity (form the highest to the lowest), and each metabolite is tested to the previous ones in the list ; if the tested metabolite is at least correlated to one previous one, it is tagged as DEL (for "deleted", written in a column called "suppress") | |
| 164 | | |
| 165 | It creates two additional tabular files: | |
| 166 | - "correlation_matrix_selected.tsv" (correlation matrix of selected metabolites only) | |
| 167 | - "sif_table.tsv" (for visualization in CytoScape, based on selected metabolites and "Cytoscape correlation threshold" filled value) | |
| 168 | |
| 169 | |
| 170 **The "corr_matrix" function:** *used for user table file* | |
| 171 | |
| 172 | It computes a correlation matrix named "correlation_matrix.tsv" and creates a sif file named "sif_table.tsv" (for visualization in CytoScape). | |
| 173 | |
| 174 | |
| 175 | |
| 176 ----------------- | |
| 177 Workflow position | |
| 178 ----------------- | |
| 179 | |
| 180 | |
| 181 **Examples of upstream tools** | |
| 182 | |
| 183 +---------------------------+--------------------------+--------+------------------------+ | |
| 184 | Name | Output file | Format | parameter | | |
| 185 +===========================+==========================+========+========================+ | |
| 186 |xcms findChromPeaks Merger |sampleMetada.tsv | Tabular| Sample metadata | | |
| 187 +---------------------------+--------------------------+--------+------------------------+ | |
| 188 |xcms fillChromPeaks |dataMatrix.tsv | Tabular| Data matrix | | |
| 189 +---------------------------+--------------------------+--------+------------------------+ | |
| 190 |CAMERA.annotate |variableMetadata.tsv | Tabular| Variable metadata | | |
| 191 +---------------------------+--------------------------+--------+------------------------+ | |
| 192 | |
| 193 | |
| 194 | |
| 195 **Examples of downstream tools** | |
| 196 | |
| 197 +---------------------------+--------------------------------------+--------+ | |
| 198 | Name | Output file | Format | | |
| 199 +===========================+======================================+========+ | |
| 200 |Hierarchical Clustering |selected_metabolites_transpo.tsv | Tabular| | |
| 201 +---------------------------+--------------------------------------+--------+ | |
| 202 |ANOVA |selected_metabolites_transpo.tsv | Tabular| | |
| 203 +---------------------------+--------------------------------------+--------+ | |
| 204 | |
| 205 | |
| 206 | |
| 207 **General schema of the metabolomic workflow** | |
| 208 | |
| 209 .. image:: MetaboAnalysisCorrelation_workflow.png | |
| 210 | |
| 211 ----------- | |
| 212 Input files | |
| 213 ----------- | |
| 214 | |
| 215 +--------------------------------+------------+ | |
| 216 | Parameter: label | Format | | |
| 217 +================================+============+ | |
| 218 | Data matrix | Tabular | | |
| 219 +--------------------------------+------------+ | |
| 220 | Sample metadata | Tabular | | |
| 221 +--------------------------------+------------+ | |
| 222 | Variable metadata | Tabular | | |
| 223 +--------------------------------+------------+ | |
| 224 | User table file | Tabular | | |
| 225 +--------------------------------+------------+ | |
| 226 | |
| 227 | |
| 228 ---------- | |
| 229 Parameters | |
| 230 ---------- | |
| 231 | |
| 232 **Choice of your input files** | |
| 233 | |
| 234 | **variableMetadata** | |
| 235 | | |
| 236 | For example, the "variableMetadata.tsv" tabular file generated by the CAMERA.annotate step of the workflow. | |
| 237 | This table must contain in particular two columns named "**pcgroup**" and "**rt**" (it is case-sensitive). | |
| 238 | | |
| 239 | **dataMatrix** | |
| 240 | | |
| 241 | For example, the "dataMatrix.tsv" tabular file generated by the CAMERA.annotate step of the workflow. | |
| 242 | | |
| 243 | **sampleMetadata** | |
| 244 | | |
| 245 | For example, the tabular file with the samples metadata generated by the xcmsSet step: one sample per line and at least two columns: ids and one variable. | |
| 246 | | |
| 247 | **user table** | |
| 248 | | |
| 249 | Tabular containing intensities where your variables (metabolites) are in columns (for example a transposition of your datamatrix file) | |
| 250 | |
| 251 **Correlation threshold for pcgroup** *(metabolomic workflow only)* | |
| 252 | |
| 253 The threshold value that will determine if two metabolites are correlated inside a same pcgroup after the creation of the global correlation matrix. | |
| 254 If you do not want to use the intra-pcgroup filter (see "corrdel" function in the description section), put this threshold to 1 and all ions will be kept. | |
| 255 | |
| 256 **Choice of the correlation method** | |
| 257 | |
| 258 Choose the correlation method (pearson, kendall or spearman). | |
| 259 | |
| 260 **Cytoscape correlation threshold** | |
| 261 | |
| 262 Choose a threshold value for selecting edges (i.e. correlations between metabolites) that will be exported to the Cytoscape sif format file. | |
| 263 | |
| 264 ------------ | |
| 265 Output files | |
| 266 ------------ | |
| 267 | |
| 268 | |
| 269 | |
| 270 **sorted_variableMetadata.tsv** *(metabolomic workflow only)* | |
| 271 | |
| 272 | A tabular file which: | |
| 273 | 1) contains the original variable metadata columns | |
| 274 | 2) is sorted by the pcgroup column | |
| 275 | 3) contains a new column "signal_moy" (mean of all the signal values of the metabolites by sample) | |
| 276 | 4) (depending of parameters) contains a "suppress" column | |
| 277 | |
| 278 **correlation_matrix_selected.tsv** *(metabolomic workflow only)* | |
| 279 | |
| 280 | A correlation matrix containing only the metabolites selected in each pcgroup (metabolites tagged as "DEL" in "suppress" column are removed), | |
| 281 | completed with two columns "rtmed" and "signal_moy". | |
| 282 | |
| 283 **sif_table.tsv** | |
| 284 | |
| 285 | A tabular file (three columns: Metabolite1, Correlation coefficient, Metabolite 2) that can be used in Cytoscape. | |
| 286 | |
| 287 ------ | |
| 288 | |
| 289 .. class:: infomark | |
| 290 | |
| 291 The output **selected_metabolites_dataMatrix.tsv** is a tabular file. You can continue your analysis using it for example in the following statistical tools: | |
| 292 | Hierarchical Clustering | |
| 293 | ANOVA | |
| 294 | |
| 295 | |
| 296 --------------------------------------------------- | |
| 297 | |
| 298 Changelog/News | |
| 299 -------------- | |
| 300 | |
| 301 | |
| 302 **Version 1.0.1+galaxy0 - 10/12/2020** | |
| 303 | |
| 304 - Update of some of the outputs' formats to match standard W4M table format | |
| 305 - Standard output (stdout) log improvement | |
| 306 - Change of testing data for faster job running for tests | |
| 307 - Fix: generation of the outputs for "Your table file" option | |
| 308 | |
| 309 **Version 1.0.1 - 20/09/2016** | |
| 310 | |
| 311 - TEST: refactoring to pass functional test using conda dependencies | |
| 312 - Help improvement | |
| 313 | |
| 314 | |
| 315 **Version 20141118 - 18/11/2014** | |
| 316 | |
| 317 </help> | |
| 318 <citations> | |
| 319 <citation type="doi">10.1093/bioinformatics/btu813</citation> | |
| 320 </citations> | |
| 321 | |
| 322 </tool> |
