Mercurial > repos > iuc > dimet_differential_analysis
diff dimet_differential_analysis.xml @ 0:c46d33411495 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/DIMet commit abca848510cb4ac8d09d95634147626ea578cdf0
author | iuc |
---|---|
date | Tue, 10 Oct 2023 11:52:44 +0000 |
parents | |
children | e45e03a99d56 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/dimet_differential_analysis.xml Tue Oct 10 11:52:44 2023 +0000 @@ -0,0 +1,296 @@ +<tool id="dimet_@EXECUTABLE@" name="dimet @TOOL_LABEL@" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="20.05"> + <description> + Differential analysis of tracer metabolomics data comparing two groups (by DIMet) + </description> + <macros> + <token name="@TOOL_LABEL@">differential analysis</token> + <token name="@EXECUTABLE@">differential_analysis</token> + <import>macros.xml</import> + </macros> + <expand macro="requirements"/> + <command detect_errors="exit_code"><![CDATA[ + @INIT_CONFIG@ + @INIT_DIFF_ANALYSIS@ + @INIT_STAT_TEST@ + @INIT_GROUPS@ + @INIT_COMPARISONS@ + HYDRA_FULL_ERROR=1 python -m dimet + -cp '$__new_file_path__/config' + '++hydra.run.dir=differential_analysis' + '++figure_path=figures' + '++table_path=tables' + '++analysis={ + dataset:{ + _target_: dimet.data.DatasetConfig, + name: "I am a synthetic data example" + }, + method:{ + _target_: dimet.method.DifferentialAnalysisConfig, + label: "differential_analysis", + name: "Pairwise computation of statistical differences", + draw_ellipses: null, + run_iris_demo: false + }, + label: differential-analysis-example2 + }' + '++analysis.method.qualityDistanceOverSpan='${qualityDistanceOverSpan}'' + '++analysis.dataset.label=' + '++analysis.timepoints=${timepoints}' + '++analysis.comparisons=${comparisons}' + '++analysis.method.statistical_test=${statistical_test}' + '++analysis.method.grouping=${groups}' + '++analysis.method.correction_method=${correction_method}' + '++analysis.method.impute_values=${impute_values}' + '++analysis.statistical_test=${statistical_test}' + '++analysis.dataset.subfolder=' + '++analysis.dataset.conditions=${conds}' + #if $metadata_path: + '++analysis.dataset.metadata=metadata' + #end if + #if str( $data_input.data_input_selector ) == "abundance": + #if $data_input.abundance_file: + '++analysis.dataset.abundances=abundance' + #end if + #elif str( $data_input.data_input_selector ) == "mean_enrichment": + #if $data_input.me_or_frac_contrib_file: + '++analysis.dataset.mean_enrichment=me_or_frac_contrib' + #end if + #elif str( $data_input.data_input_selector ) == "isotop_prop": + #if $data_input.isotop_prop_file: + '++analysis.dataset.isotopologue_proportions=isotop_prop' + #end if + #else + #if $data_input.isotop_abs_file: + '++analysis.dataset.isotopologues=isotop_abs' + #end if + #end if + @REMOVE_CONFIG@ + ]]></command> + <inputs> + <expand macro="input_parameters_diff_analysis"/> + <expand macro="conditions"/> + <expand macro="timepoint"/> + <expand macro="correction_method"/> + <param name="qualityDistanceOverSpan" type="float" min="-1.0" max="-0.1" value="-0.3" label="quality Distance Over Span" help="Default value is -0.3."/> + </inputs> + + <outputs> + <collection name="report" type="list"> + <discover_datasets pattern="__designation__" directory="tables" format="tabular"/> + </collection> + </outputs> + <tests> + <test> + <param name="data_input_selector" value="abundance" /> + <param name="abundance_file" ftype="tabular" value="rawAbundances.csv"/> + <param name="metadata_path" ftype="tabular" value="example2_metadata.csv"/> + <param name="correction_method" value="bonferroni"/> + <param name="stat_test" value="Tt"/> + <param name="qualityDistanceOverSpan" value="-0.3"/> + <param name="conditions" value='Control,L-Cycloserine'/> + <param name="timepoint" value='T0,T2h'/> + <output_collection name="report" type="list" count="4"> + <element file="abundance--cell-Control-T0-L-Cycloserine-T0-Tt.tsv" name="abundance--cell-Control-T0-L-Cycloserine-T0-Tt.tsv" ftype="tabular"/> + <element file="abundance--cell-Control-T2h-L-Cycloserine-T2h-Tt.tsv" name="abundance--cell-Control-T2h-L-Cycloserine-T2h-Tt.tsv" ftype="tabular"/> + <element file="abundance--med-Control-T0-L-Cycloserine-T0-Tt.tsv" name="abundance--med-Control-T0-L-Cycloserine-T0-Tt.tsv" ftype="tabular"/> + <element file="abundance--med-Control-T2h-L-Cycloserine-T2h-Tt.tsv" name="abundance--med-Control-T2h-L-Cycloserine-T2h-Tt.tsv" ftype="tabular"/> + </output_collection> + </test> + </tests> + <help><![CDATA[ + +This module is part of DIMet: Differential analysis of Isotope-labeled targeted Metabolomics data (https://pypi.org/project/DIMet/). + +DIMet differential analysis compares groups to evaluate for statistical differences, in a **pairwise** mode. +This pairwise mode accepts one or several defined comparison(s), that will run in a single execution. +In this way, you do not need to re-upload your data several times, +instead, you upload once your data and you compose a list of comparisons: + +- Tumoral,T0 vs Control,T0 + +- Tumoral,T2 vs Control,T2 + +- Tumoral,T24 vs Control,T24 + +- ... + +then DIMet differential analysis will execute them -one by one- automatically. + + + **Input data files** + +This tool requires (at max.) 5 tab-delimited .csv files as inputs. There are two types of files: + +- The measures' (or quantifications') files, that can be of 4 types. + +- The metadata, a unique file with the description of the samples in your measures' files. This is compulsory. + +For running DIMet @EXECUTABLE@ you need **at least one file** of measures: + +- The total **abundances** (of the metabolites) file + +- The mean **enrichment** or labelled fractional contributions + +- The **isotopologues** absolute values files (optional) + +- The **isotopologue proportions** file (optional) + +and one metadata file, WHICH IS COMPULSORY, see section **Metadata File Information**. + + +**Measures' files** + +The measure's files must be organized as matrices: + +- The first column must contain Metabolite IDs that are unique (not repeated) within the file. + +- The rest of the columns correspond to the samples + +- The rows correspond to the metabolites + +- The values must be tab separated, with the first row containing the sample/column labels. + +See the following examples of measures files: + + +Example - Metabolites **abundances**: + + =============== ================== ================== ================== ================== ================== ================== + ID **MCF001089_TD01** **MCF001089_TD02** **MCF001089_TD03** **MCF001089_TD04** **MCF001089_TD05** **MCF001089_TD06** + =============== ================== ================== ================== ================== ================== ================== + 2_3-PG 8698823.9926 10718737.7217 10724373.9 8536484.5 22060650 28898956 + 2-OHGLu 36924336 424336 92060650 45165 84951950 965165051 + Glc6P 2310 2142 2683 1683 012532068 1252172 + Gly3P 399298 991656565 525195 6365231 89451625 4952651963 + IsoCit 0 0 0 84915613 856236 954651610 + =============== ================== ================== ================== ================== ================== ================== + +Example - mean **enrichment** or labeled fractional contributions: + + =============== ================== ================== ================== ================== ================== ================== + ID **MCF001089_TD01** **MCF001089_TD02** **MCF001089_TD03** **MCF001089_TD04** **MCF001089_TD05** **MCF001089_TD06** + =============== ================== ================== ================== ================== ================== ================== + 2_3-PG 0.9711 0.968 0.9909 0.991 0.40 0.9 + 2-OHGLu 0.01719 0.0246 0.554 0.555 0.73 0.68 + Glc6P 0.06 0.66 2683 0.06 2068 2172 + Gly3P 0.06 0.06 0.06 1 5 3 + IsoCit 0.06 1 0.49 0.36 6 10 + =============== ================== ================== ================== ================== ================== ================== + +Example - **Isotopologues** + + =============== ================== ================== ================== ================== ================== ================== + ID **MCF001089_TD01** **MCF001089_TD02** **MCF001089_TD03** **MCF001089_TD04** **MCF001089_TD05** **MCF001089_TD06** + =============== ================== ================== ================== ================== ================== ================== + 2_3-PG_m+0 206171.4626 285834.0353 36413.27637 27367.17784 6171.4626 119999 + 2_3-PG_m+1 123 432 101 127 206171.4626 119999 + 2_3-PG_m+2 133780.182 161461.2364 182631.3947 132170.3807 358749.348 848754.36 + 2_3-PG_m+3 8358749.348 10271010.45 10505228.3 8376820.028 62163.30727 1088.8963 + 2-OHGLu_m+0 5550339.322 6072872.833 3855047.791 3216178.72 8358749.348 10271010.45 + 2-OHGLu_m+1 0.0 0.0 0.0 0.0 206171.4626 285834.0353 + =============== ================== ================== ================== ================== ================== ================== + + +Example - **Isotopologue proportions**: + + =============== ================== ================== ================== ================== ================== ================== + ID **MCF001089_TD01** **MCF001089_TD02** **MCF001089_TD03** **MCF001089_TD04** **MCF001089_TD05** **MCF001089_TD06** + =============== ================== ================== ================== ================== ================== ================== + 2_3-PG_m+0 0.023701408 0.026667837 0.003395407 0.05955 0.034383527 0.12 + 2_3-PG_m+1 0.0 0.0 0.0 0.0 0.4 0.12 + 2_3-PG_m+2 0.015379329 0.01506 0.017029723 0.35483229 0.54131313 0.743 + 2_3-PG_m+3 0.960919263 0.958268099 0.97957487 0.581310816 0.017029723 0.017 + 2-OHGLu_m+0 0.972778716 0.960016157 0.238843937 0.234383527 0.9998888 0.015064063 + 2-OHGLu_m+1 0.0 0.0 0.0 0.0 0.0001112 0.960919263 + =============== ================== ================== ================== ================== ================== ================== + + + +**Metadata File Information** + +Provide a tab-separated file that has the names of the samples in the first column and one header row. +Column names must be exactly in this order: + + name_to_plot + condition + timepoint + timenum + compartment + original_name + + +Example **Metadata File**: + + + ==================== =============== ============= ============ ================ ================= + **name_to_plot** **condition** **timepoint** **timenum** **compartment** **original_name** + -------------------- --------------- ------------- ------------ ---------------- ----------------- + Control_cell_T0-1 Control T0 0 cell MCF001089_TD01 + Control_cell_T0-2 Control T0 0 cell MCF001089_TD02 + Control_cell_T0-3 Control T0 0 cell MCF001089_TD03 + Tumoral_cell_T0-1 Tumoral T0 0 cell MCF001089_TD04 + Tumoral_cell_T0-2 Tumoral T0 0 cell MCF001089_TD05 + Tumoral_cell_T0-3 Tumoral T0 0 cell MCF001089_TD06 + Tumoral_cell_T24-1 Tumoral T24 24 cell MCF001089_TD07 + Tumoral_cell_T24-2 Tumoral T24 24 cell MCF001089_TD08 + Tumoral_cell_T24-3 Tumoral T24 24 cell MCF001090_TD01 + Control_med_T24-1 Control T24 24 med MCF001090_TD02 + Control_med_T24-2 Control T24 24 med MCF001090_TD03 + Tumoral_med_T24-1 Tumoral T24 24 med MCF001090_TD04 + Tumoral_med_T24-2 Tumoral T24 24 med MCF001090_TD05 + Control_med_T0-1 Control T0 0 med MCF001090_TD06 + Tumoral_med_T0-1 Tumoral T0 0 med MCF001090_TD07 + Tumoral_med_T0-2 Tumoral T0 0 med MCF001090_TD08 + ==================== =============== ============= ============ ================ ================= + + +The column **original_name** must have the names of the samples as given in your data. + +The column **name_to_plot** must have the names as you want them to be (or set identical to original_name if you prefer). To set names that +are meaningful is a better choice, as we will take them to display the results. + +The column **timenum** must contain only the numeric part of the timepoint, for example 2,0, 10, 100 (this means, without letters ("T", "t", "s", "h" etc) +nor any other symbol). Make sure these time numbers are in the same units (but do not write the units here!). + +The column **compartment** is an abbreviation, coined by you, for the compartments. This will be used for the results' files names: the longer the +compartments names are, the longer the output files' names! Please pick short and clear abbreviations to fill this column. + + +**Running the analysis** + +You can precise how you want your analysis to be executed, with the parameters: + +- **conditions**: the conditions present in your data, specifying in first place your CONTROL condition (this ordering is crucial to make the comparisons in a coherent sense). + +- **comparisons** : the pairs of [condition, timepoint] groups to compare + +- **datatypes** : the measures type(s) that you want to run + +- **statistical_test** : choose, by type of measure, the specific statistical test to be applied. + + Kruskal-Wallis, Mann-Whitney, Wilcoxon’s signed rank test, Wilcoxon’s rank sum test + t-test, and permutation test are currently offered (we use the trusted functions from scipy library https://docs.scipy.org/doc/scipy/reference/stats.html). + +For the permutation test, we have established as test statistic, the absolute difference of geometric means of the two compared groups. + +- **qualityDistanceOverSpan**: a normalized distance between the intervals of values of the compared groups, that is the cutoff for + +considering a minimal acceptable "separation", and therefore, to be suitable for statistical testing. A 'distance/span' == 1 is a perfect separation, +whereas if 'distance/span' < 0 there is no separation. +To use with caution in case of important dispersion of your intra-group values. Default is -0.3 (not stringent) + +- **correction_method** : one of the methods for multiple testing correction available in statsmodels library (bonferroni, fdr_bh, sidak, among others, see https://www.statsmodels.org/dev/generated/statsmodels.stats.multitest.multipletests.html). + +There exist hints on use that will guide you, next to the parameters. + + +**Available data for testing** + +You can test our tool with the data from our manuscript https://zenodo.org/record/8378887 (the pertinent +files for you are located in the subfolders inside the data folder). +You can also use the minimal data examples from https://zenodo.org/record/8380706 + + ]]> + </help> + <expand macro="citations" /> +</tool> \ No newline at end of file