diff dimet_differential_analysis.xml @ 0:c46d33411495 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/DIMet commit abca848510cb4ac8d09d95634147626ea578cdf0
author iuc
date Tue, 10 Oct 2023 11:52:44 +0000
parents
children e45e03a99d56
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/dimet_differential_analysis.xml	Tue Oct 10 11:52:44 2023 +0000
@@ -0,0 +1,296 @@
+<tool id="dimet_@EXECUTABLE@" name="dimet @TOOL_LABEL@" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="20.05">
+    <description>
+        Differential analysis of tracer metabolomics data comparing two groups (by DIMet)
+    </description>
+    <macros>
+        <token name="@TOOL_LABEL@">differential analysis</token>
+        <token name="@EXECUTABLE@">differential_analysis</token>
+        <import>macros.xml</import>
+    </macros>
+    <expand macro="requirements"/>
+    <command detect_errors="exit_code"><![CDATA[
+    @INIT_CONFIG@
+    @INIT_DIFF_ANALYSIS@
+    @INIT_STAT_TEST@
+    @INIT_GROUPS@
+    @INIT_COMPARISONS@
+    HYDRA_FULL_ERROR=1 python -m dimet
+        -cp '$__new_file_path__/config'
+        '++hydra.run.dir=differential_analysis'
+        '++figure_path=figures'
+        '++table_path=tables'
+        '++analysis={
+            dataset:{
+                _target_: dimet.data.DatasetConfig,
+                name: "I am a synthetic data example"
+             },
+             method:{
+                _target_: dimet.method.DifferentialAnalysisConfig,
+                label: "differential_analysis",
+                name: "Pairwise computation of statistical differences",
+                draw_ellipses: null,
+                run_iris_demo: false
+              },
+              label: differential-analysis-example2
+         }'
+         '++analysis.method.qualityDistanceOverSpan='${qualityDistanceOverSpan}''
+         '++analysis.dataset.label='
+         '++analysis.timepoints=${timepoints}'
+         '++analysis.comparisons=${comparisons}'
+         '++analysis.method.statistical_test=${statistical_test}'
+         '++analysis.method.grouping=${groups}'
+         '++analysis.method.correction_method=${correction_method}'
+         '++analysis.method.impute_values=${impute_values}'
+         '++analysis.statistical_test=${statistical_test}'
+         '++analysis.dataset.subfolder='
+         '++analysis.dataset.conditions=${conds}'
+         #if $metadata_path:
+             '++analysis.dataset.metadata=metadata'
+         #end if
+         #if str( $data_input.data_input_selector ) == "abundance":
+            #if $data_input.abundance_file:
+                '++analysis.dataset.abundances=abundance'
+            #end if
+        #elif str( $data_input.data_input_selector ) == "mean_enrichment":
+            #if $data_input.me_or_frac_contrib_file:
+                '++analysis.dataset.mean_enrichment=me_or_frac_contrib'
+            #end if
+        #elif str( $data_input.data_input_selector ) == "isotop_prop":
+            #if $data_input.isotop_prop_file:
+                '++analysis.dataset.isotopologue_proportions=isotop_prop'
+            #end if
+        #else
+            #if $data_input.isotop_abs_file:
+                '++analysis.dataset.isotopologues=isotop_abs'
+            #end if
+        #end if
+    @REMOVE_CONFIG@
+    ]]></command>
+    <inputs>
+        <expand macro="input_parameters_diff_analysis"/>
+        <expand macro="conditions"/>
+        <expand macro="timepoint"/>
+        <expand macro="correction_method"/>
+        <param name="qualityDistanceOverSpan" type="float" min="-1.0" max="-0.1" value="-0.3" label="quality Distance Over Span" help="Default value is -0.3."/>
+    </inputs>
+
+    <outputs>
+        <collection name="report" type="list">
+            <discover_datasets pattern="__designation__" directory="tables" format="tabular"/>
+        </collection>
+    </outputs>
+    <tests>
+        <test>
+            <param name="data_input_selector" value="abundance" />
+            <param name="abundance_file" ftype="tabular" value="rawAbundances.csv"/>
+            <param name="metadata_path" ftype="tabular" value="example2_metadata.csv"/>
+            <param name="correction_method" value="bonferroni"/>
+            <param name="stat_test" value="Tt"/>
+            <param name="qualityDistanceOverSpan" value="-0.3"/>
+            <param name="conditions" value='Control,L-Cycloserine'/>
+            <param name="timepoint" value='T0,T2h'/>
+             <output_collection name="report" type="list" count="4">
+                <element file="abundance--cell-Control-T0-L-Cycloserine-T0-Tt.tsv"  name="abundance--cell-Control-T0-L-Cycloserine-T0-Tt.tsv" ftype="tabular"/>
+                <element file="abundance--cell-Control-T2h-L-Cycloserine-T2h-Tt.tsv" name="abundance--cell-Control-T2h-L-Cycloserine-T2h-Tt.tsv" ftype="tabular"/>
+                <element file="abundance--med-Control-T0-L-Cycloserine-T0-Tt.tsv" name="abundance--med-Control-T0-L-Cycloserine-T0-Tt.tsv" ftype="tabular"/>
+                <element file="abundance--med-Control-T2h-L-Cycloserine-T2h-Tt.tsv" name="abundance--med-Control-T2h-L-Cycloserine-T2h-Tt.tsv" ftype="tabular"/>
+            </output_collection>
+        </test>
+    </tests>
+    <help><![CDATA[
+
+This module is part of DIMet: Differential analysis of Isotope-labeled targeted Metabolomics data (https://pypi.org/project/DIMet/).
+
+DIMet differential analysis compares groups to evaluate for statistical differences, in a **pairwise** mode.
+This pairwise mode accepts one or several defined comparison(s), that will run in a single execution.
+In this way, you do not need to re-upload your data several times,
+instead, you upload once your data and you compose a list of comparisons:
+
+-  Tumoral,T0  vs  Control,T0
+
+-  Tumoral,T2  vs  Control,T2
+
+-  Tumoral,T24  vs  Control,T24
+
+-  ...
+
+then DIMet differential analysis will execute them -one by one- automatically.
+
+
+    **Input data files**
+
+This tool requires (at max.) 5 tab-delimited .csv files as inputs. There are two types of files:
+
+- The measures' (or quantifications') files, that can be of 4 types.
+
+- The metadata, a unique file with the description of the samples in your measures' files. This is compulsory.
+
+For running DIMet @EXECUTABLE@ you need **at least one file** of measures:
+
+- The total **abundances** (of the metabolites) file
+
+- The mean **enrichment** or labelled fractional contributions
+
+- The **isotopologues** absolute values files (optional)
+
+- The **isotopologue proportions** file (optional)
+
+and one metadata file, WHICH IS COMPULSORY, see section **Metadata File Information**.
+
+
+**Measures' files**
+
+The measure's files must be organized as matrices:
+
+- The first column must contain Metabolite IDs that are unique (not repeated) within the file.
+
+- The rest of the columns correspond to the samples
+
+- The rows correspond to the metabolites
+
+- The values must be tab separated, with the first row containing the sample/column labels.
+
+See the following examples of measures files:
+
+
+Example - Metabolites **abundances**:
+
+    =============== ================== ================== ================== ================== ================== ==================
+    ID              **MCF001089_TD01** **MCF001089_TD02** **MCF001089_TD03** **MCF001089_TD04** **MCF001089_TD05** **MCF001089_TD06**
+    =============== ================== ================== ================== ================== ================== ==================
+    2_3-PG          8698823.9926       10718737.7217      10724373.9         8536484.5          22060650           28898956
+    2-OHGLu         36924336           424336             92060650           45165              84951950           965165051
+    Glc6P           2310               2142               2683               1683               012532068          1252172
+    Gly3P           399298             991656565          525195             6365231            89451625           4952651963
+    IsoCit          0                  0                  0                  84915613           856236             954651610
+    =============== ================== ================== ================== ================== ================== ==================
+
+Example - mean **enrichment** or labeled fractional contributions:
+
+    =============== ================== ================== ================== ================== ================== ==================
+    ID              **MCF001089_TD01** **MCF001089_TD02** **MCF001089_TD03** **MCF001089_TD04** **MCF001089_TD05** **MCF001089_TD06**
+    =============== ================== ================== ================== ================== ================== ==================
+    2_3-PG          0.9711             0.968              0.9909             0.991              0.40               0.9
+    2-OHGLu         0.01719            0.0246             0.554              0.555              0.73               0.68
+    Glc6P           0.06               0.66               2683               0.06               2068               2172
+    Gly3P           0.06               0.06               0.06               1                  5                  3
+    IsoCit          0.06               1                  0.49               0.36               6                  10
+    =============== ================== ================== ================== ================== ================== ==================
+
+Example - **Isotopologues**
+
+    =============== ================== ================== ================== ================== ================== ==================
+    ID              **MCF001089_TD01** **MCF001089_TD02** **MCF001089_TD03** **MCF001089_TD04** **MCF001089_TD05** **MCF001089_TD06**
+    =============== ================== ================== ================== ================== ================== ==================
+    2_3-PG_m+0      206171.4626        285834.0353        36413.27637        27367.17784        6171.4626          119999
+    2_3-PG_m+1      123                432                101                127                206171.4626        119999
+    2_3-PG_m+2      133780.182         161461.2364        182631.3947        132170.3807        358749.348         848754.36
+    2_3-PG_m+3      8358749.348        10271010.45        10505228.3         8376820.028        62163.30727        1088.8963
+    2-OHGLu_m+0     5550339.322        6072872.833        3855047.791        3216178.72         8358749.348        10271010.45
+    2-OHGLu_m+1     0.0                0.0                0.0                0.0                206171.4626        285834.0353
+    =============== ================== ================== ================== ================== ================== ==================
+
+
+Example - **Isotopologue proportions**:
+
+    =============== ================== ================== ================== ================== ================== ==================
+    ID              **MCF001089_TD01** **MCF001089_TD02** **MCF001089_TD03** **MCF001089_TD04** **MCF001089_TD05** **MCF001089_TD06**
+    =============== ================== ================== ================== ================== ================== ==================
+    2_3-PG_m+0      0.023701408        0.026667837        0.003395407        0.05955            0.034383527        0.12
+    2_3-PG_m+1      0.0                0.0                0.0                0.0                0.4                0.12
+    2_3-PG_m+2      0.015379329        0.01506            0.017029723        0.35483229         0.54131313         0.743
+    2_3-PG_m+3      0.960919263        0.958268099        0.97957487         0.581310816        0.017029723        0.017
+    2-OHGLu_m+0     0.972778716        0.960016157        0.238843937        0.234383527        0.9998888          0.015064063
+    2-OHGLu_m+1     0.0                0.0                0.0                0.0                0.0001112          0.960919263
+    =============== ================== ================== ================== ================== ================== ==================
+
+
+
+**Metadata File Information**
+
+Provide a tab-separated file that has the names of the samples in the first column and one header row.
+Column names must be exactly in this order:
+
+   name_to_plot
+   condition
+   timepoint
+   timenum
+   compartment
+   original_name
+
+
+Example **Metadata File**:
+
+
+    ==================== =============== ============= ============ ================ =================
+    **name_to_plot**     **condition**   **timepoint** **timenum**  **compartment**   **original_name**
+    -------------------- --------------- ------------- ------------ ---------------- -----------------
+    Control_cell_T0-1    Control         T0            0            cell             MCF001089_TD01
+    Control_cell_T0-2    Control         T0            0            cell             MCF001089_TD02
+    Control_cell_T0-3    Control         T0            0            cell             MCF001089_TD03
+    Tumoral_cell_T0-1    Tumoral         T0            0            cell             MCF001089_TD04
+    Tumoral_cell_T0-2    Tumoral         T0            0            cell             MCF001089_TD05
+    Tumoral_cell_T0-3    Tumoral         T0            0            cell             MCF001089_TD06
+    Tumoral_cell_T24-1   Tumoral         T24           24           cell             MCF001089_TD07
+    Tumoral_cell_T24-2   Tumoral         T24           24           cell             MCF001089_TD08
+    Tumoral_cell_T24-3   Tumoral         T24           24           cell             MCF001090_TD01
+    Control_med_T24-1    Control         T24           24           med              MCF001090_TD02
+    Control_med_T24-2    Control         T24           24           med              MCF001090_TD03
+    Tumoral_med_T24-1    Tumoral         T24           24           med              MCF001090_TD04
+    Tumoral_med_T24-2    Tumoral         T24           24           med              MCF001090_TD05
+    Control_med_T0-1     Control         T0            0            med              MCF001090_TD06
+    Tumoral_med_T0-1     Tumoral         T0            0            med              MCF001090_TD07
+    Tumoral_med_T0-2     Tumoral         T0            0            med              MCF001090_TD08
+    ==================== =============== ============= ============ ================ =================
+
+
+The column **original_name** must have the names of the samples as given in your data.
+
+The column **name_to_plot** must have the names as you want them to be (or set identical to original_name if you prefer). To set names that
+are meaningful is a better choice, as we will take them to display the results.
+
+The column **timenum** must contain only the numeric part of the timepoint, for example 2,0, 10, 100 (this means, without letters ("T", "t", "s", "h" etc)
+nor any other symbol). Make sure these time numbers are in the same units (but do not write the units here!).
+
+The column **compartment** is an abbreviation, coined by you, for the compartments. This will be used for the results' files names: the longer the
+compartments names are, the longer the output files' names! Please pick short and clear abbreviations to fill this column.
+
+
+**Running the analysis**
+
+You can precise how you want your analysis to be executed, with the parameters:
+
+- **conditions**: the conditions present in your data, specifying in first place your CONTROL condition (this ordering is crucial to make the comparisons in a coherent sense).
+
+- **comparisons** : the pairs of [condition, timepoint] groups to compare
+
+- **datatypes** : the measures type(s) that you want to run
+
+- **statistical_test** : choose, by type of measure, the specific statistical test to be applied.
+
+ Kruskal-Wallis, Mann-Whitney, Wilcoxon’s signed rank test, Wilcoxon’s rank sum test
+ t-test, and permutation test are currently offered (we use the trusted functions from scipy library https://docs.scipy.org/doc/scipy/reference/stats.html).
+
+For the permutation test, we have established as test statistic, the absolute difference of geometric means of the two compared groups.
+
+- **qualityDistanceOverSpan**: a normalized distance between the intervals of values of the compared groups, that is the cutoff for
+
+considering a minimal acceptable "separation", and therefore, to be suitable for statistical testing. A 'distance/span' == 1 is a perfect separation,
+whereas if 'distance/span' < 0 there is no separation.
+To use with caution in case of important dispersion of your intra-group values. Default is -0.3 (not stringent)
+
+- **correction_method** : one of the methods for multiple testing correction available in statsmodels library (bonferroni, fdr_bh, sidak, among others, see https://www.statsmodels.org/dev/generated/statsmodels.stats.multitest.multipletests.html).
+
+There exist hints on use that will guide you, next to the parameters.
+
+
+**Available data for testing**
+
+You can test our tool with the data from our manuscript https://zenodo.org/record/8378887 (the pertinent
+files for you are located in the subfolders inside the data folder).
+You can also use the minimal data examples from https://zenodo.org/record/8380706
+
+ ]]>
+    </help>
+    <expand macro="citations" />
+</tool>
\ No newline at end of file