Mercurial > repos > iuc > dimet_differential_analysis
view dimet_differential_analysis.xml @ 2:e45e03a99d56 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/DIMet commit 30fe10acdf65c6917856a0eae21dc91abd2f609f
author | iuc |
---|---|
date | Thu, 15 Feb 2024 12:53:06 +0000 |
parents | c46d33411495 |
children | 8579d74e740b |
line wrap: on
line source
<tool id="dimet_@EXECUTABLE@" name="dimet @TOOL_LABEL@" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="20.05"> <description> Differential analysis of tracer metabolomics data comparing two groups (by DIMet) </description> <macros> <token name="@TOOL_LABEL@">differential analysis</token> <token name="@EXECUTABLE@">differential_analysis</token> <import>macros.xml</import> </macros> <expand macro="requirements"/> <command detect_errors="exit_code"><![CDATA[ @INIT_CONFIG@ @INIT_DIFF_ANALYSIS@ @INIT_STAT_TEST@ @INIT_GROUPS@ @INIT_DIFF_ANALYSIS_COMPARISONS@ HYDRA_FULL_ERROR=1 python -m dimet -cp '$__new_file_path__/config' '++hydra.run.dir=differential_analysis' '++figure_path=figures' '++table_path=tables' '++analysis={ dataset:{ _target_: dimet.data.DatasetConfig, name: "I am a synthetic data example" }, method:{ _target_: dimet.method.DifferentialAnalysisConfig, label: "differential_analysis", name: "Pairwise computation of statistical differences", draw_ellipses: null, run_iris_demo: false }, label: differential-analysis-example2 }' '++analysis.method.qualityDistanceOverSpan='${qualityDistanceOverSpan}'' '++analysis.dataset.label=' '++analysis.timepoints=${timepoints}' '++analysis.comparisons=${comparisons}' '++analysis.method.statistical_test=${statistical_test}' '++analysis.method.grouping=${groups}' '++analysis.method.correction_method=${correction_method}' '++analysis.method.impute_values=${impute_values}' '++analysis.statistical_test=${statistical_test}' '++analysis.dataset.subfolder=' '++analysis.dataset.conditions=${conditions}' #if $metadata_path: '++analysis.dataset.metadata=metadata' #end if #if str( $data_input.data_input_selector ) == "abundance": #if $data_input.abundance_file: '++analysis.dataset.abundances=abundance' #end if #elif str( $data_input.data_input_selector ) == "mean_enrichment": #if $data_input.me_or_frac_contrib_file: '++analysis.dataset.mean_enrichment=me_or_frac_contrib' #end if #elif str( $data_input.data_input_selector ) == "isotop_prop": #if $data_input.isotop_prop_file: '++analysis.dataset.isotopologue_proportions=isotop_prop' #end if #else #if $data_input.isotop_abs_file: '++analysis.dataset.isotopologues=isotop_abs' #end if #end if @REMOVE_CONFIG@ ]]></command> <inputs> <expand macro="input_parameters_diff_analysis"/> <expand macro="factor_list"/> <expand macro="timepoint"/> <expand macro="correction_method"/> <param name="qualityDistanceOverSpan" type="float" min="-1.0" max="-0.1" value="-0.3" label="quality Distance Over Span" help="Default value is -0.3."/> </inputs> <outputs> <collection name="report" type="list"> <discover_datasets pattern="__designation__" directory="tables" format="tabular"/> </collection> </outputs> <tests> <test> <param name="data_input_selector" value="abundance" /> <param name="abundance_file" ftype="tabular" value="rawAbundances.csv"/> <param name="metadata_path" ftype="tabular" value="example2_metadata.csv"/> <param name="correction_method" value="bonferroni"/> <param name="statistical_test_type" value="parametric"/> <param name="stat_test" value="Tt"/> <param name="qualityDistanceOverSpan" value="-0.3"/> <repeat name="factor_list"> <param name="condition" value="Control"/> </repeat> <repeat name="factor_list"> <param name="condition" value="L-Cycloserine"/> </repeat> <param name="timepoint" value='T0,T2h'/> <output_collection name="report" type="list" count="4"> <element file="abundance--cell-Control-T0-L-Cycloserine-T0-Tt.tsv" name="abundance--cell-Control-T0-L-Cycloserine-T0-Tt.tsv" ftype="tabular"/> <element file="abundance--cell-Control-T2h-L-Cycloserine-T2h-Tt.tsv" name="abundance--cell-Control-T2h-L-Cycloserine-T2h-Tt.tsv" ftype="tabular"/> <element file="abundance--med-Control-T0-L-Cycloserine-T0-Tt.tsv" name="abundance--med-Control-T0-L-Cycloserine-T0-Tt.tsv" ftype="tabular"/> <element file="abundance--med-Control-T2h-L-Cycloserine-T2h-Tt.tsv" name="abundance--med-Control-T2h-L-Cycloserine-T2h-Tt.tsv" ftype="tabular"/> </output_collection> </test> </tests> <help><![CDATA[ This module is part of DIMet: Differential analysis of Isotope-labeled targeted Metabolomics data (https://pypi.org/project/DIMet/). DIMet differential analysis compares groups to evaluate for statistical differences, in a **pairwise** mode. This pairwise mode accepts one or several defined comparison(s), that will run in a single execution. In this way, you do not need to re-upload your data several times, instead, you upload once your data and you compose a list of comparisons: - Tumoral,T0 vs Control,T0 - Tumoral,T2 vs Control,T2 - Tumoral,T24 vs Control,T24 - ... then DIMet differential analysis will execute them -one by one- automatically. **Input data files** This tool requires (at max.) 5 tab-delimited .csv files as inputs. There are two types of files: - The measures' (or quantifications') files, that can be of 4 types. - The metadata, a unique file with the description of the samples in your measures' files. This is compulsory. For running DIMet @EXECUTABLE@ you need **at least one file** of measures: - The total **abundances** (of the metabolites) file - The mean **enrichment** or labelled fractional contributions - The **isotopologues** absolute values files (optional) - The **isotopologue proportions** file (optional) and one metadata file, WHICH IS COMPULSORY, see section **Metadata File Information**. **Measures' files** The measure's files must be organized as matrices: - The first column must contain Metabolite IDs that are unique (not repeated) within the file. - The rest of the columns correspond to the samples - The rows correspond to the metabolites - The values must be tab separated, with the first row containing the sample/column labels. See the following examples of measures files: Example - Metabolites **abundances**: =============== ================== ================== ================== ================== ================== ================== ID **MCF001089_TD01** **MCF001089_TD02** **MCF001089_TD03** **MCF001089_TD04** **MCF001089_TD05** **MCF001089_TD06** =============== ================== ================== ================== ================== ================== ================== 2_3-PG 8698823.9926 10718737.7217 10724373.9 8536484.5 22060650 28898956 2-OHGLu 36924336 424336 92060650 45165 84951950 965165051 Glc6P 2310 2142 2683 1683 012532068 1252172 Gly3P 399298 991656565 525195 6365231 89451625 4952651963 IsoCit 0 0 0 84915613 856236 954651610 =============== ================== ================== ================== ================== ================== ================== Example - mean **enrichment** or labeled fractional contributions: =============== ================== ================== ================== ================== ================== ================== ID **MCF001089_TD01** **MCF001089_TD02** **MCF001089_TD03** **MCF001089_TD04** **MCF001089_TD05** **MCF001089_TD06** =============== ================== ================== ================== ================== ================== ================== 2_3-PG 0.9711 0.968 0.9909 0.991 0.40 0.9 2-OHGLu 0.01719 0.0246 0.554 0.555 0.73 0.68 Glc6P 0.06 0.66 2683 0.06 2068 2172 Gly3P 0.06 0.06 0.06 1 5 3 IsoCit 0.06 1 0.49 0.36 6 10 =============== ================== ================== ================== ================== ================== ================== Example - **Isotopologues** =============== ================== ================== ================== ================== ================== ================== ID **MCF001089_TD01** **MCF001089_TD02** **MCF001089_TD03** **MCF001089_TD04** **MCF001089_TD05** **MCF001089_TD06** =============== ================== ================== ================== ================== ================== ================== 2_3-PG_m+0 206171.4626 285834.0353 36413.27637 27367.17784 6171.4626 119999 2_3-PG_m+1 123 432 101 127 206171.4626 119999 2_3-PG_m+2 133780.182 161461.2364 182631.3947 132170.3807 358749.348 848754.36 2_3-PG_m+3 8358749.348 10271010.45 10505228.3 8376820.028 62163.30727 1088.8963 2-OHGLu_m+0 5550339.322 6072872.833 3855047.791 3216178.72 8358749.348 10271010.45 2-OHGLu_m+1 0.0 0.0 0.0 0.0 206171.4626 285834.0353 =============== ================== ================== ================== ================== ================== ================== Example - **Isotopologue proportions**: =============== ================== ================== ================== ================== ================== ================== ID **MCF001089_TD01** **MCF001089_TD02** **MCF001089_TD03** **MCF001089_TD04** **MCF001089_TD05** **MCF001089_TD06** =============== ================== ================== ================== ================== ================== ================== 2_3-PG_m+0 0.023701408 0.026667837 0.003395407 0.05955 0.034383527 0.12 2_3-PG_m+1 0.0 0.0 0.0 0.0 0.4 0.12 2_3-PG_m+2 0.015379329 0.01506 0.017029723 0.35483229 0.54131313 0.743 2_3-PG_m+3 0.960919263 0.958268099 0.97957487 0.581310816 0.017029723 0.017 2-OHGLu_m+0 0.972778716 0.960016157 0.238843937 0.234383527 0.9998888 0.015064063 2-OHGLu_m+1 0.0 0.0 0.0 0.0 0.0001112 0.960919263 =============== ================== ================== ================== ================== ================== ================== **Metadata File Information** Provide a tab-separated file that has the names of the samples in the first column and one header row. Column names must be exactly in this order: name_to_plot condition timepoint timenum compartment original_name Example **Metadata File**: ==================== =============== ============= ============ ================ ================= **name_to_plot** **condition** **timepoint** **timenum** **compartment** **original_name** -------------------- --------------- ------------- ------------ ---------------- ----------------- Control_cell_T0-1 Control T0 0 cell MCF001089_TD01 Control_cell_T0-2 Control T0 0 cell MCF001089_TD02 Control_cell_T0-3 Control T0 0 cell MCF001089_TD03 Tumoral_cell_T0-1 Tumoral T0 0 cell MCF001089_TD04 Tumoral_cell_T0-2 Tumoral T0 0 cell MCF001089_TD05 Tumoral_cell_T0-3 Tumoral T0 0 cell MCF001089_TD06 Tumoral_cell_T24-1 Tumoral T24 24 cell MCF001089_TD07 Tumoral_cell_T24-2 Tumoral T24 24 cell MCF001089_TD08 Tumoral_cell_T24-3 Tumoral T24 24 cell MCF001090_TD01 Control_med_T24-1 Control T24 24 med MCF001090_TD02 Control_med_T24-2 Control T24 24 med MCF001090_TD03 Tumoral_med_T24-1 Tumoral T24 24 med MCF001090_TD04 Tumoral_med_T24-2 Tumoral T24 24 med MCF001090_TD05 Control_med_T0-1 Control T0 0 med MCF001090_TD06 Tumoral_med_T0-1 Tumoral T0 0 med MCF001090_TD07 Tumoral_med_T0-2 Tumoral T0 0 med MCF001090_TD08 ==================== =============== ============= ============ ================ ================= The column **original_name** must have the names of the samples as given in your data. The column **name_to_plot** must have the names as you want them to be (or set identical to original_name if you prefer). To set names that are meaningful is a better choice, as we will take them to display the results. The column **timenum** must contain only the numeric part of the timepoint, for example 2,0, 10, 100 (this means, without letters ("T", "t", "s", "h" etc) nor any other symbol). Make sure these time numbers are in the same units (but do not write the units here!). The column **compartment** is an abbreviation, coined by you, for the compartments. This will be used for the results' files names: the longer the compartments names are, the longer the output files' names! Please pick short and clear abbreviations to fill this column. **Running the analysis** You can precise how you want your analysis to be executed, with the parameters: - **conditions**: the conditions present in your data, specifying in first place your CONTROL condition (this ordering is crucial to make the comparisons in a coherent sense). - **comparisons** : the pairs of [condition, timepoint] groups to compare - **datatypes** : the measures type(s) that you want to run - **statistical_test** : choose, by type of measure, the specific statistical test to be applied. Kruskal-Wallis, Mann-Whitney, Wilcoxon’s signed rank test, Wilcoxon’s rank sum test t-test, and permutation test are currently offered (we use the trusted functions from scipy library https://docs.scipy.org/doc/scipy/reference/stats.html). For the permutation test, we have established as test statistic, the absolute difference of geometric means of the two compared groups. - **qualityDistanceOverSpan**: a normalized distance between the intervals of values of the compared groups, that is the cutoff for considering a minimal acceptable "separation", and therefore, to be suitable for statistical testing. A 'distance/span' == 1 is a perfect separation, whereas if 'distance/span' < 0 there is no separation. To use with caution in case of important dispersion of your intra-group values. Default is -0.3 (not stringent) - **correction_method** : one of the methods for multiple testing correction available in statsmodels library (bonferroni, fdr_bh, sidak, among others, see https://www.statsmodels.org/dev/generated/statsmodels.stats.multitest.multipletests.html). There exist hints on use that will guide you, next to the parameters. For more information about the implemented statistical tests, please visit: https://github.com/cbib/DIMet/wiki/2-Statistical-tests The output files are explained in https://github.com/cbib/DIMet/wiki/3-Output **Available data for testing** You can test our tool with the data from our manuscript https://zenodo.org/record/10579862 (the pertinent files for you are located in the subfolders inside the data folder). You can also use the minimal data examples from https://zenodo.org/record/10579891 ]]> </help> <expand macro="citations" /> </tool>