Mercurial > repos > iuc > dimet_differential_analysis
comparison dimet_differential_analysis.xml @ 0:c46d33411495 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/DIMet commit abca848510cb4ac8d09d95634147626ea578cdf0
author | iuc |
---|---|
date | Tue, 10 Oct 2023 11:52:44 +0000 |
parents | |
children | e45e03a99d56 |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:c46d33411495 |
---|---|
1 <tool id="dimet_@EXECUTABLE@" name="dimet @TOOL_LABEL@" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="20.05"> | |
2 <description> | |
3 Differential analysis of tracer metabolomics data comparing two groups (by DIMet) | |
4 </description> | |
5 <macros> | |
6 <token name="@TOOL_LABEL@">differential analysis</token> | |
7 <token name="@EXECUTABLE@">differential_analysis</token> | |
8 <import>macros.xml</import> | |
9 </macros> | |
10 <expand macro="requirements"/> | |
11 <command detect_errors="exit_code"><![CDATA[ | |
12 @INIT_CONFIG@ | |
13 @INIT_DIFF_ANALYSIS@ | |
14 @INIT_STAT_TEST@ | |
15 @INIT_GROUPS@ | |
16 @INIT_COMPARISONS@ | |
17 HYDRA_FULL_ERROR=1 python -m dimet | |
18 -cp '$__new_file_path__/config' | |
19 '++hydra.run.dir=differential_analysis' | |
20 '++figure_path=figures' | |
21 '++table_path=tables' | |
22 '++analysis={ | |
23 dataset:{ | |
24 _target_: dimet.data.DatasetConfig, | |
25 name: "I am a synthetic data example" | |
26 }, | |
27 method:{ | |
28 _target_: dimet.method.DifferentialAnalysisConfig, | |
29 label: "differential_analysis", | |
30 name: "Pairwise computation of statistical differences", | |
31 draw_ellipses: null, | |
32 run_iris_demo: false | |
33 }, | |
34 label: differential-analysis-example2 | |
35 }' | |
36 '++analysis.method.qualityDistanceOverSpan='${qualityDistanceOverSpan}'' | |
37 '++analysis.dataset.label=' | |
38 '++analysis.timepoints=${timepoints}' | |
39 '++analysis.comparisons=${comparisons}' | |
40 '++analysis.method.statistical_test=${statistical_test}' | |
41 '++analysis.method.grouping=${groups}' | |
42 '++analysis.method.correction_method=${correction_method}' | |
43 '++analysis.method.impute_values=${impute_values}' | |
44 '++analysis.statistical_test=${statistical_test}' | |
45 '++analysis.dataset.subfolder=' | |
46 '++analysis.dataset.conditions=${conds}' | |
47 #if $metadata_path: | |
48 '++analysis.dataset.metadata=metadata' | |
49 #end if | |
50 #if str( $data_input.data_input_selector ) == "abundance": | |
51 #if $data_input.abundance_file: | |
52 '++analysis.dataset.abundances=abundance' | |
53 #end if | |
54 #elif str( $data_input.data_input_selector ) == "mean_enrichment": | |
55 #if $data_input.me_or_frac_contrib_file: | |
56 '++analysis.dataset.mean_enrichment=me_or_frac_contrib' | |
57 #end if | |
58 #elif str( $data_input.data_input_selector ) == "isotop_prop": | |
59 #if $data_input.isotop_prop_file: | |
60 '++analysis.dataset.isotopologue_proportions=isotop_prop' | |
61 #end if | |
62 #else | |
63 #if $data_input.isotop_abs_file: | |
64 '++analysis.dataset.isotopologues=isotop_abs' | |
65 #end if | |
66 #end if | |
67 @REMOVE_CONFIG@ | |
68 ]]></command> | |
69 <inputs> | |
70 <expand macro="input_parameters_diff_analysis"/> | |
71 <expand macro="conditions"/> | |
72 <expand macro="timepoint"/> | |
73 <expand macro="correction_method"/> | |
74 <param name="qualityDistanceOverSpan" type="float" min="-1.0" max="-0.1" value="-0.3" label="quality Distance Over Span" help="Default value is -0.3."/> | |
75 </inputs> | |
76 | |
77 <outputs> | |
78 <collection name="report" type="list"> | |
79 <discover_datasets pattern="__designation__" directory="tables" format="tabular"/> | |
80 </collection> | |
81 </outputs> | |
82 <tests> | |
83 <test> | |
84 <param name="data_input_selector" value="abundance" /> | |
85 <param name="abundance_file" ftype="tabular" value="rawAbundances.csv"/> | |
86 <param name="metadata_path" ftype="tabular" value="example2_metadata.csv"/> | |
87 <param name="correction_method" value="bonferroni"/> | |
88 <param name="stat_test" value="Tt"/> | |
89 <param name="qualityDistanceOverSpan" value="-0.3"/> | |
90 <param name="conditions" value='Control,L-Cycloserine'/> | |
91 <param name="timepoint" value='T0,T2h'/> | |
92 <output_collection name="report" type="list" count="4"> | |
93 <element file="abundance--cell-Control-T0-L-Cycloserine-T0-Tt.tsv" name="abundance--cell-Control-T0-L-Cycloserine-T0-Tt.tsv" ftype="tabular"/> | |
94 <element file="abundance--cell-Control-T2h-L-Cycloserine-T2h-Tt.tsv" name="abundance--cell-Control-T2h-L-Cycloserine-T2h-Tt.tsv" ftype="tabular"/> | |
95 <element file="abundance--med-Control-T0-L-Cycloserine-T0-Tt.tsv" name="abundance--med-Control-T0-L-Cycloserine-T0-Tt.tsv" ftype="tabular"/> | |
96 <element file="abundance--med-Control-T2h-L-Cycloserine-T2h-Tt.tsv" name="abundance--med-Control-T2h-L-Cycloserine-T2h-Tt.tsv" ftype="tabular"/> | |
97 </output_collection> | |
98 </test> | |
99 </tests> | |
100 <help><![CDATA[ | |
101 | |
102 This module is part of DIMet: Differential analysis of Isotope-labeled targeted Metabolomics data (https://pypi.org/project/DIMet/). | |
103 | |
104 DIMet differential analysis compares groups to evaluate for statistical differences, in a **pairwise** mode. | |
105 This pairwise mode accepts one or several defined comparison(s), that will run in a single execution. | |
106 In this way, you do not need to re-upload your data several times, | |
107 instead, you upload once your data and you compose a list of comparisons: | |
108 | |
109 - Tumoral,T0 vs Control,T0 | |
110 | |
111 - Tumoral,T2 vs Control,T2 | |
112 | |
113 - Tumoral,T24 vs Control,T24 | |
114 | |
115 - ... | |
116 | |
117 then DIMet differential analysis will execute them -one by one- automatically. | |
118 | |
119 | |
120 **Input data files** | |
121 | |
122 This tool requires (at max.) 5 tab-delimited .csv files as inputs. There are two types of files: | |
123 | |
124 - The measures' (or quantifications') files, that can be of 4 types. | |
125 | |
126 - The metadata, a unique file with the description of the samples in your measures' files. This is compulsory. | |
127 | |
128 For running DIMet @EXECUTABLE@ you need **at least one file** of measures: | |
129 | |
130 - The total **abundances** (of the metabolites) file | |
131 | |
132 - The mean **enrichment** or labelled fractional contributions | |
133 | |
134 - The **isotopologues** absolute values files (optional) | |
135 | |
136 - The **isotopologue proportions** file (optional) | |
137 | |
138 and one metadata file, WHICH IS COMPULSORY, see section **Metadata File Information**. | |
139 | |
140 | |
141 **Measures' files** | |
142 | |
143 The measure's files must be organized as matrices: | |
144 | |
145 - The first column must contain Metabolite IDs that are unique (not repeated) within the file. | |
146 | |
147 - The rest of the columns correspond to the samples | |
148 | |
149 - The rows correspond to the metabolites | |
150 | |
151 - The values must be tab separated, with the first row containing the sample/column labels. | |
152 | |
153 See the following examples of measures files: | |
154 | |
155 | |
156 Example - Metabolites **abundances**: | |
157 | |
158 =============== ================== ================== ================== ================== ================== ================== | |
159 ID **MCF001089_TD01** **MCF001089_TD02** **MCF001089_TD03** **MCF001089_TD04** **MCF001089_TD05** **MCF001089_TD06** | |
160 =============== ================== ================== ================== ================== ================== ================== | |
161 2_3-PG 8698823.9926 10718737.7217 10724373.9 8536484.5 22060650 28898956 | |
162 2-OHGLu 36924336 424336 92060650 45165 84951950 965165051 | |
163 Glc6P 2310 2142 2683 1683 012532068 1252172 | |
164 Gly3P 399298 991656565 525195 6365231 89451625 4952651963 | |
165 IsoCit 0 0 0 84915613 856236 954651610 | |
166 =============== ================== ================== ================== ================== ================== ================== | |
167 | |
168 Example - mean **enrichment** or labeled fractional contributions: | |
169 | |
170 =============== ================== ================== ================== ================== ================== ================== | |
171 ID **MCF001089_TD01** **MCF001089_TD02** **MCF001089_TD03** **MCF001089_TD04** **MCF001089_TD05** **MCF001089_TD06** | |
172 =============== ================== ================== ================== ================== ================== ================== | |
173 2_3-PG 0.9711 0.968 0.9909 0.991 0.40 0.9 | |
174 2-OHGLu 0.01719 0.0246 0.554 0.555 0.73 0.68 | |
175 Glc6P 0.06 0.66 2683 0.06 2068 2172 | |
176 Gly3P 0.06 0.06 0.06 1 5 3 | |
177 IsoCit 0.06 1 0.49 0.36 6 10 | |
178 =============== ================== ================== ================== ================== ================== ================== | |
179 | |
180 Example - **Isotopologues** | |
181 | |
182 =============== ================== ================== ================== ================== ================== ================== | |
183 ID **MCF001089_TD01** **MCF001089_TD02** **MCF001089_TD03** **MCF001089_TD04** **MCF001089_TD05** **MCF001089_TD06** | |
184 =============== ================== ================== ================== ================== ================== ================== | |
185 2_3-PG_m+0 206171.4626 285834.0353 36413.27637 27367.17784 6171.4626 119999 | |
186 2_3-PG_m+1 123 432 101 127 206171.4626 119999 | |
187 2_3-PG_m+2 133780.182 161461.2364 182631.3947 132170.3807 358749.348 848754.36 | |
188 2_3-PG_m+3 8358749.348 10271010.45 10505228.3 8376820.028 62163.30727 1088.8963 | |
189 2-OHGLu_m+0 5550339.322 6072872.833 3855047.791 3216178.72 8358749.348 10271010.45 | |
190 2-OHGLu_m+1 0.0 0.0 0.0 0.0 206171.4626 285834.0353 | |
191 =============== ================== ================== ================== ================== ================== ================== | |
192 | |
193 | |
194 Example - **Isotopologue proportions**: | |
195 | |
196 =============== ================== ================== ================== ================== ================== ================== | |
197 ID **MCF001089_TD01** **MCF001089_TD02** **MCF001089_TD03** **MCF001089_TD04** **MCF001089_TD05** **MCF001089_TD06** | |
198 =============== ================== ================== ================== ================== ================== ================== | |
199 2_3-PG_m+0 0.023701408 0.026667837 0.003395407 0.05955 0.034383527 0.12 | |
200 2_3-PG_m+1 0.0 0.0 0.0 0.0 0.4 0.12 | |
201 2_3-PG_m+2 0.015379329 0.01506 0.017029723 0.35483229 0.54131313 0.743 | |
202 2_3-PG_m+3 0.960919263 0.958268099 0.97957487 0.581310816 0.017029723 0.017 | |
203 2-OHGLu_m+0 0.972778716 0.960016157 0.238843937 0.234383527 0.9998888 0.015064063 | |
204 2-OHGLu_m+1 0.0 0.0 0.0 0.0 0.0001112 0.960919263 | |
205 =============== ================== ================== ================== ================== ================== ================== | |
206 | |
207 | |
208 | |
209 **Metadata File Information** | |
210 | |
211 Provide a tab-separated file that has the names of the samples in the first column and one header row. | |
212 Column names must be exactly in this order: | |
213 | |
214 name_to_plot | |
215 condition | |
216 timepoint | |
217 timenum | |
218 compartment | |
219 original_name | |
220 | |
221 | |
222 Example **Metadata File**: | |
223 | |
224 | |
225 ==================== =============== ============= ============ ================ ================= | |
226 **name_to_plot** **condition** **timepoint** **timenum** **compartment** **original_name** | |
227 -------------------- --------------- ------------- ------------ ---------------- ----------------- | |
228 Control_cell_T0-1 Control T0 0 cell MCF001089_TD01 | |
229 Control_cell_T0-2 Control T0 0 cell MCF001089_TD02 | |
230 Control_cell_T0-3 Control T0 0 cell MCF001089_TD03 | |
231 Tumoral_cell_T0-1 Tumoral T0 0 cell MCF001089_TD04 | |
232 Tumoral_cell_T0-2 Tumoral T0 0 cell MCF001089_TD05 | |
233 Tumoral_cell_T0-3 Tumoral T0 0 cell MCF001089_TD06 | |
234 Tumoral_cell_T24-1 Tumoral T24 24 cell MCF001089_TD07 | |
235 Tumoral_cell_T24-2 Tumoral T24 24 cell MCF001089_TD08 | |
236 Tumoral_cell_T24-3 Tumoral T24 24 cell MCF001090_TD01 | |
237 Control_med_T24-1 Control T24 24 med MCF001090_TD02 | |
238 Control_med_T24-2 Control T24 24 med MCF001090_TD03 | |
239 Tumoral_med_T24-1 Tumoral T24 24 med MCF001090_TD04 | |
240 Tumoral_med_T24-2 Tumoral T24 24 med MCF001090_TD05 | |
241 Control_med_T0-1 Control T0 0 med MCF001090_TD06 | |
242 Tumoral_med_T0-1 Tumoral T0 0 med MCF001090_TD07 | |
243 Tumoral_med_T0-2 Tumoral T0 0 med MCF001090_TD08 | |
244 ==================== =============== ============= ============ ================ ================= | |
245 | |
246 | |
247 The column **original_name** must have the names of the samples as given in your data. | |
248 | |
249 The column **name_to_plot** must have the names as you want them to be (or set identical to original_name if you prefer). To set names that | |
250 are meaningful is a better choice, as we will take them to display the results. | |
251 | |
252 The column **timenum** must contain only the numeric part of the timepoint, for example 2,0, 10, 100 (this means, without letters ("T", "t", "s", "h" etc) | |
253 nor any other symbol). Make sure these time numbers are in the same units (but do not write the units here!). | |
254 | |
255 The column **compartment** is an abbreviation, coined by you, for the compartments. This will be used for the results' files names: the longer the | |
256 compartments names are, the longer the output files' names! Please pick short and clear abbreviations to fill this column. | |
257 | |
258 | |
259 **Running the analysis** | |
260 | |
261 You can precise how you want your analysis to be executed, with the parameters: | |
262 | |
263 - **conditions**: the conditions present in your data, specifying in first place your CONTROL condition (this ordering is crucial to make the comparisons in a coherent sense). | |
264 | |
265 - **comparisons** : the pairs of [condition, timepoint] groups to compare | |
266 | |
267 - **datatypes** : the measures type(s) that you want to run | |
268 | |
269 - **statistical_test** : choose, by type of measure, the specific statistical test to be applied. | |
270 | |
271 Kruskal-Wallis, Mann-Whitney, Wilcoxon’s signed rank test, Wilcoxon’s rank sum test | |
272 t-test, and permutation test are currently offered (we use the trusted functions from scipy library https://docs.scipy.org/doc/scipy/reference/stats.html). | |
273 | |
274 For the permutation test, we have established as test statistic, the absolute difference of geometric means of the two compared groups. | |
275 | |
276 - **qualityDistanceOverSpan**: a normalized distance between the intervals of values of the compared groups, that is the cutoff for | |
277 | |
278 considering a minimal acceptable "separation", and therefore, to be suitable for statistical testing. A 'distance/span' == 1 is a perfect separation, | |
279 whereas if 'distance/span' < 0 there is no separation. | |
280 To use with caution in case of important dispersion of your intra-group values. Default is -0.3 (not stringent) | |
281 | |
282 - **correction_method** : one of the methods for multiple testing correction available in statsmodels library (bonferroni, fdr_bh, sidak, among others, see https://www.statsmodels.org/dev/generated/statsmodels.stats.multitest.multipletests.html). | |
283 | |
284 There exist hints on use that will guide you, next to the parameters. | |
285 | |
286 | |
287 **Available data for testing** | |
288 | |
289 You can test our tool with the data from our manuscript https://zenodo.org/record/8378887 (the pertinent | |
290 files for you are located in the subfolders inside the data folder). | |
291 You can also use the minimal data examples from https://zenodo.org/record/8380706 | |
292 | |
293 ]]> | |
294 </help> | |
295 <expand macro="citations" /> | |
296 </tool> |