comparison dimet_differential_multigroup_analysis.xml @ 0:e88cf5618b40 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/DIMet commit abca848510cb4ac8d09d95634147626ea578cdf0
author iuc
date Tue, 10 Oct 2023 11:53:22 +0000
parents
children b581ea4908ae
comparison
equal deleted inserted replaced
-1:000000000000 0:e88cf5618b40
1 <tool id="dimet_@EXECUTABLE@" name="dimet @TOOL_LABEL@" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="20.05">
2 <description>
3 Differential analysis of 3 or more chosen groups of tracer metabolomics data (by DIMet)
4 </description>
5 <macros>
6 <token name="@TOOL_LABEL@">differential multigroup analysis</token>
7 <token name="@EXECUTABLE@">differential_multigroup_analysis</token>
8 <import>macros.xml</import>
9 </macros>
10 <expand macro="requirements"/>
11 <command detect_errors="exit_code"><![CDATA[
12 @INIT_CONFIG@
13 @INIT_DIFF_MULTIGROUP_ANALYSIS@
14 @INIT_STAT_TEST@
15 @INIT_DATATYPES@
16 @INIT_GROUPS@
17 @INIT_CONDITIONS@
18 @INIT_TIMEPOINTS@
19 @INIT_MULTIGROUP_COMPARISONS@
20 HYDRA_FULL_ERROR=1 python -m dimet
21 -cp '$__new_file_path__/config'
22 '++hydra.run.dir=multi_group_comparison'
23 '++figure_path=figures'
24 '++table_path=tables'
25 '++analysis={
26 dataset:{
27 _target_:dimet.data.DatasetConfig,
28 name: "Galaxy DIMet run"
29 },
30 method:{
31 _target_: dimet.method.MultiGroupComparisonConfig,
32 label: multi_group_comparison,
33 name: "Multi group statistical comparison using Kruskal-Wallis test"
34 },
35 label: multi_group_comparison,
36 datatypes:${datatypes_avail}
37 }'
38 '++analysis.method.datatypes=${datatypes_avail}'
39 '++analysis.method.qualityDistanceOverSpan='${qualityDistanceOverSpan}''
40 '++analysis.timepoints=${timepoints}'
41 '++analysis.conditions=${comparisons}'
42 '++analysis.dataset.label='
43 '++analysis.statistical_test=${statistical_test}'
44 '++analysis.method.correction_method=${correction_method}'
45 '++analysis.method.impute_values=${impute_values}'
46 '++analysis.dataset.subfolder='
47 '++analysis.method.grouping=${groups}'
48 '++analysis.dataset.conditions=${conds}'
49 #if $metadata_path:
50 '++analysis.dataset.metadata=metadata'
51 #end if
52 #if str( $data_input.data_input_selector ) == "abundance":
53 #if $data_input.abundance_file:
54 '++analysis.dataset.abundances=abundance'
55 #end if
56 #elif str( $data_input.data_input_selector ) == "mean_enrichment":
57 #if $data_input.me_or_frac_contrib_file:
58 '++analysis.dataset.mean_enrichment=me_or_frac_contrib'
59 #end if
60 #elif str( $data_input.data_input_selector ) == "isotop_prop":
61 #if $data_input.isotop_prop_file:
62 '++analysis.dataset.isotopologue_proportions=isotop_prop'
63 #end if
64 #else
65 #if $data_input.isotop_abs_file:
66 '+analysis.dataset.isotopologues=isotop_abs'
67 #end if
68 #end if
69 @REMOVE_CONFIG@
70 ]]></command>
71 <inputs>
72 <expand macro="input_parameters_diff_analysis"/>
73 <expand macro="conditions_multigroup"/>
74 <expand macro="timepoint_multigroup"/>
75 <expand macro="compartments"/>
76 <expand macro="correction_method"/>
77 <param name="qualityDistanceOverSpan" type="float" min="-1.0" max="-0.1" value="-0.3" label="quality Distance Over Span" help="Default value is -0.3."/>
78 </inputs>
79
80 <outputs>
81 <collection name="report" type="list">
82 <discover_datasets pattern="__designation__" directory="tables" format="tabular"/>
83 </collection>
84 </outputs>
85 <tests>
86 <test>
87 <param name="data_input_selector" value="abundance" />
88 <param name="abundance_file" ftype="tabular" value="rawAbundances3.csv"/>
89 <param name="metadata_path" ftype="tabular" value="example3_metadata.csv"/>
90 <param name="stat_test" value="Tt"/>
91 <param name="correction_method" value="bonferroni"/>
92 <param name="qualityDistanceOverSpan" value="-0.3"/>
93 <param name="conditions" value="Control,Cond1,Cond2"/>
94 <param name="timepoint" value="T0h,T2h"/>
95 <param name="compartments" value="cell"/>
96
97 <output_collection name="report" type="list" count="1">
98 <element file="abundance--cell--multigroup.tsv" name="abundance--cell--multigroup.tsv" ftype="tabular"/>
99 </output_collection>
100 </test>
101 </tests>
102 <help><![CDATA[
103 This module is part of DIMet: Differential analysis of Isotope-labeled targeted Metabolomics data (https://pypi.org/project/DIMet/).
104
105 **Input data files**
106
107 This tool computes the Kruskall Wallis test over 3 or more groups,
108 to evaluate if at least one group is significantly different to the other groups (H0: median of all of the groups is equal). For illustration see the section **Metadata File Information** which contains three conditions: Control, Core_mass and Edge_tissue, across one single time point, thus 3 groups (number-of-groups-in-my-data = number-of-conditions x number-of-timepoints)).
109
110 This tool requires (at max.) 5 tab-delimited .csv files as inputs. There are two types of files:
111
112 - The measures' (or quantifications') files, that can be of 4 types.
113
114 - The metadata, a unique file with the description of the samples in your measures' files. This is compulsory.
115
116 For running DIMet @EXECUTABLE@ you need **at least one** file of measures:
117
118
119 - The total **abundances** (of the metabolites) file
120
121 - The mean **enrichment** or labelled fractional contributions
122
123 - The **isotopologues** absolute values files (optional)
124
125 - The **isotopologue proportions** file (optional)
126
127 and one metadata file, WHICH IS COMPULSORY, see section **Metadata File Information**.
128
129 The measure's files must be organized as matrices:
130
131 - The first column must contain Metabolite IDs that are unique (not repeated) within the file.
132
133 - The rest of the columns correspond to the samples
134
135 - The rows correspond to the metabolites
136
137 - The values must be tab separated, with the first row containing the sample/column labels.
138
139 See the following examples of measures' files:
140
141
142 Example - Metabolites **abundances**:
143
144 =============== ================== ================== ================== ================== ================== ==================
145 ID **MCF001089_TD01** **MCF001089_TD02** **MCF001089_TD03** **MCF001089_TD04** **MCF001089_TD05** **MCF001089_TD06**
146 =============== ================== ================== ================== ================== ================== ==================
147 2_3-PG 8698823.9926 10718737.7217 10724373.9 8536484.5 22060650 28898956
148 2-OHGLu 36924336 424336 92060650 45165 84951950 965165051
149 Glc6P 2310 2142 2683 1683 012532068 1252172
150 Gly3P 399298 991656565 525195 6365231 89451625 4952651963
151 IsoCit 0 0 0 84915613 856236 954651610
152 =============== ================== ================== ================== ================== ================== ==================
153
154 Example - mean **enrichment** or labeled fractional contributions:
155
156 =============== ================== ================== ================== ================== ================== ==================
157 ID **MCF001089_TD01** **MCF001089_TD02** **MCF001089_TD03** **MCF001089_TD04** **MCF001089_TD05** **MCF001089_TD06**
158 =============== ================== ================== ================== ================== ================== ==================
159 2_3-PG 0.9711 0.968 0.9909 0.991 0.40 0.9
160 2-OHGLu 0.01719 0.0246 0.554 0.555 0.73 0.68
161 Glc6P 0.06 0.66 2683 0.06 2068 2172
162 Gly3P 0.06 0.06 0.06 1 5 3
163 IsoCit 0.06 1 0.49 0.36 6 10
164 =============== ================== ================== ================== ================== ================== ==================
165
166 Example - **Isotopologues**
167
168 =============== ================== ================== ================== ================== ================== ==================
169 ID **MCF001089_TD01** **MCF001089_TD02** **MCF001089_TD03** **MCF001089_TD04** **MCF001089_TD05** **MCF001089_TD06**
170 =============== ================== ================== ================== ================== ================== ==================
171 2_3-PG_m+0 206171.4626 285834.0353 36413.27637 27367.17784 6171.4626 119999
172 2_3-PG_m+1 123 432 101 127 206171.4626 119999
173 2_3-PG_m+2 133780.182 161461.2364 182631.3947 132170.3807 358749.348 848754.36
174 2_3-PG_m+3 8358749.348 10271010.45 10505228.3 8376820.028 62163.30727 1088.8963
175 2-OHGLu_m+0 5550339.322 6072872.833 3855047.791 3216178.72 8358749.348 10271010.45
176 2-OHGLu_m+1 0.0 0.0 0.0 0.0 206171.4626 285834.0353
177 =============== ================== ================== ================== ================== ================== ==================
178
179
180 Example - **Isotopologue proportions**:
181
182 =============== ================== ================== ================== ================== ================== ==================
183 ID **MCF001089_TD01** **MCF001089_TD02** **MCF001089_TD03** **MCF001089_TD04** **MCF001089_TD05** **MCF001089_TD06**
184 =============== ================== ================== ================== ================== ================== ==================
185 2_3-PG_m+0 0.023701408 0.026667837 0.003395407 0.05955 0.034383527 0.12
186 2_3-PG_m+1 0.0 0.0 0.0 0.0 0.4 0.12
187 2_3-PG_m+2 0.015379329 0.01506 0.017029723 0.35483229 0.54131313 0.743
188 2_3-PG_m+3 0.960919263 0.958268099 0.97957487 0.581310816 0.017029723 0.017
189 2-OHGLu_m+0 0.972778716 0.960016157 0.238843937 0.234383527 0.9998888 0.015064063
190 2-OHGLu_m+1 0.0 0.0 0.0 0.0 0.0001112 0.960919263
191 =============== ================== ================== ================== ================== ================== ==================
192
193
194
195 **Metadata File Information**
196
197 Provide a tab-separated file that has the names of the samples in the first column and one header row.
198 Column names must be exactly in this order:
199
200 name_to_plot
201 condition
202 timepoint
203 timenum
204 compartment
205 original_name
206
207
208 Example **Metadata File**:
209
210
211 ==================== =============== ============= ============ ================ =================
212 **name_to_plot** **condition** **timepoint** **timenum** **compartment** **original_name**
213 -------------------- --------------- ------------- ------------ ---------------- -----------------
214 Control_cell-1 Control T0 0 cell MCF001089_TD01
215 Control_cell-2 Control T0 0 cell MCF001089_TD02
216 Control_cell-3 Control T0 0 cell MCF001089_TD03
217 CoreMass_cell-1 Core_mass T0 0 cell MCF001089_TD04
218 CoreMass_cell-2 Core_mass T0 0 cell MCF001089_TD05
219 CoreMass_cell-3 Core_mass T0 0 cell MCF001089_TD06
220 EdgeTiss_cell-1 Edge_tissue T0 0 cell MCF001089_TD07
221 EdgeTiss_cell-2 Edge_tissue T0 0 cell MCF001089_TD08
222 EdgeTiss_cell-3 Edge_tissue T0 0 cell MCF001089_TD09
223 ==================== =============== ============= ============ ================ =================
224
225
226 The column **original_name** must have the names of the samples as given in your data.
227
228 The column **name_to_plot** must have the names as you want them to be (or set identical to original_name if you prefer). To set names that are meaningful is a better choice, as we will take them to display the results.
229
230 The column **timenum** must contain only the numeric part of the timepoint, for example 2,0, 10, 100 (this means, without letters ("T", "t", "s", "h" etc) nor any other symbol). Make sure these time numbers are in the same units (but do not write the units here!).
231
232 The column **compartment** is an abbreviation, coined by you, for the compartments. This will be used for the results' files names: the longer the compartments names are, the longer the output files' names! Please pick short and clear abbreviations to fill this column.
233
234
235 **Running the analysis**
236
237
238 You can precise how you want your analysis to be executed, with the parameters:
239
240 - **comparisons** : the groups you want to select for comparison in the multigroup analysis
241
242 (Note that **comparisons** parameter in the multigroup analysis does not have the same usage as in the pairwise differential analysis).
243
244 - **datatypes** : the measures type(s) that you want to run
245
246 - **correction_method** : one of the methods for multiple testinc correction available in statsmodels library (bonferroni, fdr_bh, sidak, among others, see https://www.statsmodels.org/dev/generated/statsmodels.stats.multitest.multipletests.html).
247
248 There exist hints on use that will guide you, next to the parameters.
249
250 **Available data for testing**
251
252 You can test our tool with the data from our manuscript https://zenodo.org/record/8378887 (the pertinent
253 files for you are located in the subfolders inside the data folder).
254 You can also use the minimal data examples from https://zenodo.org/record/8380706
255
256 ]]>
257 </help>
258 <expand macro="citations"/>
259 </tool>