comparison dimet_differential_analysis.xml @ 0:c46d33411495 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/DIMet commit abca848510cb4ac8d09d95634147626ea578cdf0
author iuc
date Tue, 10 Oct 2023 11:52:44 +0000
parents
children e45e03a99d56
comparison
equal deleted inserted replaced
-1:000000000000 0:c46d33411495
1 <tool id="dimet_@EXECUTABLE@" name="dimet @TOOL_LABEL@" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="20.05">
2 <description>
3 Differential analysis of tracer metabolomics data comparing two groups (by DIMet)
4 </description>
5 <macros>
6 <token name="@TOOL_LABEL@">differential analysis</token>
7 <token name="@EXECUTABLE@">differential_analysis</token>
8 <import>macros.xml</import>
9 </macros>
10 <expand macro="requirements"/>
11 <command detect_errors="exit_code"><![CDATA[
12 @INIT_CONFIG@
13 @INIT_DIFF_ANALYSIS@
14 @INIT_STAT_TEST@
15 @INIT_GROUPS@
16 @INIT_COMPARISONS@
17 HYDRA_FULL_ERROR=1 python -m dimet
18 -cp '$__new_file_path__/config'
19 '++hydra.run.dir=differential_analysis'
20 '++figure_path=figures'
21 '++table_path=tables'
22 '++analysis={
23 dataset:{
24 _target_: dimet.data.DatasetConfig,
25 name: "I am a synthetic data example"
26 },
27 method:{
28 _target_: dimet.method.DifferentialAnalysisConfig,
29 label: "differential_analysis",
30 name: "Pairwise computation of statistical differences",
31 draw_ellipses: null,
32 run_iris_demo: false
33 },
34 label: differential-analysis-example2
35 }'
36 '++analysis.method.qualityDistanceOverSpan='${qualityDistanceOverSpan}''
37 '++analysis.dataset.label='
38 '++analysis.timepoints=${timepoints}'
39 '++analysis.comparisons=${comparisons}'
40 '++analysis.method.statistical_test=${statistical_test}'
41 '++analysis.method.grouping=${groups}'
42 '++analysis.method.correction_method=${correction_method}'
43 '++analysis.method.impute_values=${impute_values}'
44 '++analysis.statistical_test=${statistical_test}'
45 '++analysis.dataset.subfolder='
46 '++analysis.dataset.conditions=${conds}'
47 #if $metadata_path:
48 '++analysis.dataset.metadata=metadata'
49 #end if
50 #if str( $data_input.data_input_selector ) == "abundance":
51 #if $data_input.abundance_file:
52 '++analysis.dataset.abundances=abundance'
53 #end if
54 #elif str( $data_input.data_input_selector ) == "mean_enrichment":
55 #if $data_input.me_or_frac_contrib_file:
56 '++analysis.dataset.mean_enrichment=me_or_frac_contrib'
57 #end if
58 #elif str( $data_input.data_input_selector ) == "isotop_prop":
59 #if $data_input.isotop_prop_file:
60 '++analysis.dataset.isotopologue_proportions=isotop_prop'
61 #end if
62 #else
63 #if $data_input.isotop_abs_file:
64 '++analysis.dataset.isotopologues=isotop_abs'
65 #end if
66 #end if
67 @REMOVE_CONFIG@
68 ]]></command>
69 <inputs>
70 <expand macro="input_parameters_diff_analysis"/>
71 <expand macro="conditions"/>
72 <expand macro="timepoint"/>
73 <expand macro="correction_method"/>
74 <param name="qualityDistanceOverSpan" type="float" min="-1.0" max="-0.1" value="-0.3" label="quality Distance Over Span" help="Default value is -0.3."/>
75 </inputs>
76
77 <outputs>
78 <collection name="report" type="list">
79 <discover_datasets pattern="__designation__" directory="tables" format="tabular"/>
80 </collection>
81 </outputs>
82 <tests>
83 <test>
84 <param name="data_input_selector" value="abundance" />
85 <param name="abundance_file" ftype="tabular" value="rawAbundances.csv"/>
86 <param name="metadata_path" ftype="tabular" value="example2_metadata.csv"/>
87 <param name="correction_method" value="bonferroni"/>
88 <param name="stat_test" value="Tt"/>
89 <param name="qualityDistanceOverSpan" value="-0.3"/>
90 <param name="conditions" value='Control,L-Cycloserine'/>
91 <param name="timepoint" value='T0,T2h'/>
92 <output_collection name="report" type="list" count="4">
93 <element file="abundance--cell-Control-T0-L-Cycloserine-T0-Tt.tsv" name="abundance--cell-Control-T0-L-Cycloserine-T0-Tt.tsv" ftype="tabular"/>
94 <element file="abundance--cell-Control-T2h-L-Cycloserine-T2h-Tt.tsv" name="abundance--cell-Control-T2h-L-Cycloserine-T2h-Tt.tsv" ftype="tabular"/>
95 <element file="abundance--med-Control-T0-L-Cycloserine-T0-Tt.tsv" name="abundance--med-Control-T0-L-Cycloserine-T0-Tt.tsv" ftype="tabular"/>
96 <element file="abundance--med-Control-T2h-L-Cycloserine-T2h-Tt.tsv" name="abundance--med-Control-T2h-L-Cycloserine-T2h-Tt.tsv" ftype="tabular"/>
97 </output_collection>
98 </test>
99 </tests>
100 <help><![CDATA[
101
102 This module is part of DIMet: Differential analysis of Isotope-labeled targeted Metabolomics data (https://pypi.org/project/DIMet/).
103
104 DIMet differential analysis compares groups to evaluate for statistical differences, in a **pairwise** mode.
105 This pairwise mode accepts one or several defined comparison(s), that will run in a single execution.
106 In this way, you do not need to re-upload your data several times,
107 instead, you upload once your data and you compose a list of comparisons:
108
109 - Tumoral,T0 vs Control,T0
110
111 - Tumoral,T2 vs Control,T2
112
113 - Tumoral,T24 vs Control,T24
114
115 - ...
116
117 then DIMet differential analysis will execute them -one by one- automatically.
118
119
120 **Input data files**
121
122 This tool requires (at max.) 5 tab-delimited .csv files as inputs. There are two types of files:
123
124 - The measures' (or quantifications') files, that can be of 4 types.
125
126 - The metadata, a unique file with the description of the samples in your measures' files. This is compulsory.
127
128 For running DIMet @EXECUTABLE@ you need **at least one file** of measures:
129
130 - The total **abundances** (of the metabolites) file
131
132 - The mean **enrichment** or labelled fractional contributions
133
134 - The **isotopologues** absolute values files (optional)
135
136 - The **isotopologue proportions** file (optional)
137
138 and one metadata file, WHICH IS COMPULSORY, see section **Metadata File Information**.
139
140
141 **Measures' files**
142
143 The measure's files must be organized as matrices:
144
145 - The first column must contain Metabolite IDs that are unique (not repeated) within the file.
146
147 - The rest of the columns correspond to the samples
148
149 - The rows correspond to the metabolites
150
151 - The values must be tab separated, with the first row containing the sample/column labels.
152
153 See the following examples of measures files:
154
155
156 Example - Metabolites **abundances**:
157
158 =============== ================== ================== ================== ================== ================== ==================
159 ID **MCF001089_TD01** **MCF001089_TD02** **MCF001089_TD03** **MCF001089_TD04** **MCF001089_TD05** **MCF001089_TD06**
160 =============== ================== ================== ================== ================== ================== ==================
161 2_3-PG 8698823.9926 10718737.7217 10724373.9 8536484.5 22060650 28898956
162 2-OHGLu 36924336 424336 92060650 45165 84951950 965165051
163 Glc6P 2310 2142 2683 1683 012532068 1252172
164 Gly3P 399298 991656565 525195 6365231 89451625 4952651963
165 IsoCit 0 0 0 84915613 856236 954651610
166 =============== ================== ================== ================== ================== ================== ==================
167
168 Example - mean **enrichment** or labeled fractional contributions:
169
170 =============== ================== ================== ================== ================== ================== ==================
171 ID **MCF001089_TD01** **MCF001089_TD02** **MCF001089_TD03** **MCF001089_TD04** **MCF001089_TD05** **MCF001089_TD06**
172 =============== ================== ================== ================== ================== ================== ==================
173 2_3-PG 0.9711 0.968 0.9909 0.991 0.40 0.9
174 2-OHGLu 0.01719 0.0246 0.554 0.555 0.73 0.68
175 Glc6P 0.06 0.66 2683 0.06 2068 2172
176 Gly3P 0.06 0.06 0.06 1 5 3
177 IsoCit 0.06 1 0.49 0.36 6 10
178 =============== ================== ================== ================== ================== ================== ==================
179
180 Example - **Isotopologues**
181
182 =============== ================== ================== ================== ================== ================== ==================
183 ID **MCF001089_TD01** **MCF001089_TD02** **MCF001089_TD03** **MCF001089_TD04** **MCF001089_TD05** **MCF001089_TD06**
184 =============== ================== ================== ================== ================== ================== ==================
185 2_3-PG_m+0 206171.4626 285834.0353 36413.27637 27367.17784 6171.4626 119999
186 2_3-PG_m+1 123 432 101 127 206171.4626 119999
187 2_3-PG_m+2 133780.182 161461.2364 182631.3947 132170.3807 358749.348 848754.36
188 2_3-PG_m+3 8358749.348 10271010.45 10505228.3 8376820.028 62163.30727 1088.8963
189 2-OHGLu_m+0 5550339.322 6072872.833 3855047.791 3216178.72 8358749.348 10271010.45
190 2-OHGLu_m+1 0.0 0.0 0.0 0.0 206171.4626 285834.0353
191 =============== ================== ================== ================== ================== ================== ==================
192
193
194 Example - **Isotopologue proportions**:
195
196 =============== ================== ================== ================== ================== ================== ==================
197 ID **MCF001089_TD01** **MCF001089_TD02** **MCF001089_TD03** **MCF001089_TD04** **MCF001089_TD05** **MCF001089_TD06**
198 =============== ================== ================== ================== ================== ================== ==================
199 2_3-PG_m+0 0.023701408 0.026667837 0.003395407 0.05955 0.034383527 0.12
200 2_3-PG_m+1 0.0 0.0 0.0 0.0 0.4 0.12
201 2_3-PG_m+2 0.015379329 0.01506 0.017029723 0.35483229 0.54131313 0.743
202 2_3-PG_m+3 0.960919263 0.958268099 0.97957487 0.581310816 0.017029723 0.017
203 2-OHGLu_m+0 0.972778716 0.960016157 0.238843937 0.234383527 0.9998888 0.015064063
204 2-OHGLu_m+1 0.0 0.0 0.0 0.0 0.0001112 0.960919263
205 =============== ================== ================== ================== ================== ================== ==================
206
207
208
209 **Metadata File Information**
210
211 Provide a tab-separated file that has the names of the samples in the first column and one header row.
212 Column names must be exactly in this order:
213
214 name_to_plot
215 condition
216 timepoint
217 timenum
218 compartment
219 original_name
220
221
222 Example **Metadata File**:
223
224
225 ==================== =============== ============= ============ ================ =================
226 **name_to_plot** **condition** **timepoint** **timenum** **compartment** **original_name**
227 -------------------- --------------- ------------- ------------ ---------------- -----------------
228 Control_cell_T0-1 Control T0 0 cell MCF001089_TD01
229 Control_cell_T0-2 Control T0 0 cell MCF001089_TD02
230 Control_cell_T0-3 Control T0 0 cell MCF001089_TD03
231 Tumoral_cell_T0-1 Tumoral T0 0 cell MCF001089_TD04
232 Tumoral_cell_T0-2 Tumoral T0 0 cell MCF001089_TD05
233 Tumoral_cell_T0-3 Tumoral T0 0 cell MCF001089_TD06
234 Tumoral_cell_T24-1 Tumoral T24 24 cell MCF001089_TD07
235 Tumoral_cell_T24-2 Tumoral T24 24 cell MCF001089_TD08
236 Tumoral_cell_T24-3 Tumoral T24 24 cell MCF001090_TD01
237 Control_med_T24-1 Control T24 24 med MCF001090_TD02
238 Control_med_T24-2 Control T24 24 med MCF001090_TD03
239 Tumoral_med_T24-1 Tumoral T24 24 med MCF001090_TD04
240 Tumoral_med_T24-2 Tumoral T24 24 med MCF001090_TD05
241 Control_med_T0-1 Control T0 0 med MCF001090_TD06
242 Tumoral_med_T0-1 Tumoral T0 0 med MCF001090_TD07
243 Tumoral_med_T0-2 Tumoral T0 0 med MCF001090_TD08
244 ==================== =============== ============= ============ ================ =================
245
246
247 The column **original_name** must have the names of the samples as given in your data.
248
249 The column **name_to_plot** must have the names as you want them to be (or set identical to original_name if you prefer). To set names that
250 are meaningful is a better choice, as we will take them to display the results.
251
252 The column **timenum** must contain only the numeric part of the timepoint, for example 2,0, 10, 100 (this means, without letters ("T", "t", "s", "h" etc)
253 nor any other symbol). Make sure these time numbers are in the same units (but do not write the units here!).
254
255 The column **compartment** is an abbreviation, coined by you, for the compartments. This will be used for the results' files names: the longer the
256 compartments names are, the longer the output files' names! Please pick short and clear abbreviations to fill this column.
257
258
259 **Running the analysis**
260
261 You can precise how you want your analysis to be executed, with the parameters:
262
263 - **conditions**: the conditions present in your data, specifying in first place your CONTROL condition (this ordering is crucial to make the comparisons in a coherent sense).
264
265 - **comparisons** : the pairs of [condition, timepoint] groups to compare
266
267 - **datatypes** : the measures type(s) that you want to run
268
269 - **statistical_test** : choose, by type of measure, the specific statistical test to be applied.
270
271 Kruskal-Wallis, Mann-Whitney, Wilcoxon’s signed rank test, Wilcoxon’s rank sum test
272 t-test, and permutation test are currently offered (we use the trusted functions from scipy library https://docs.scipy.org/doc/scipy/reference/stats.html).
273
274 For the permutation test, we have established as test statistic, the absolute difference of geometric means of the two compared groups.
275
276 - **qualityDistanceOverSpan**: a normalized distance between the intervals of values of the compared groups, that is the cutoff for
277
278 considering a minimal acceptable "separation", and therefore, to be suitable for statistical testing. A 'distance/span' == 1 is a perfect separation,
279 whereas if 'distance/span' < 0 there is no separation.
280 To use with caution in case of important dispersion of your intra-group values. Default is -0.3 (not stringent)
281
282 - **correction_method** : one of the methods for multiple testing correction available in statsmodels library (bonferroni, fdr_bh, sidak, among others, see https://www.statsmodels.org/dev/generated/statsmodels.stats.multitest.multipletests.html).
283
284 There exist hints on use that will guide you, next to the parameters.
285
286
287 **Available data for testing**
288
289 You can test our tool with the data from our manuscript https://zenodo.org/record/8378887 (the pertinent
290 files for you are located in the subfolders inside the data folder).
291 You can also use the minimal data examples from https://zenodo.org/record/8380706
292
293 ]]>
294 </help>
295 <expand macro="citations" />
296 </tool>