Mercurial > repos > iuc > dimet_pca_analysis

diff dimet_pca_analysis.xml @ 0:04d213632103 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/DIMet commit abca848510cb4ac8d09d95634147626ea578cdf0
author: iuc
date: Tue, 10 Oct 2023 11:56:02 +0000
children: c0103ea608fc
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/dimet_pca_analysis.xml	Tue Oct 10 11:56:02 2023 +0000
@@ -0,0 +1,215 @@
+<tool id="dimet_@EXECUTABLE@" name="dimet @TOOL_LABEL@" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="20.05">
+    <description>
+        Principal Component Analysis for tracer metabolomics data, producing tables (by DIMet)
+    </description>
+    <macros>
+        <token name="@TOOL_LABEL@">pca analysis</token>
+        <token name="@EXECUTABLE@">pca_analysis</token>
+        <import>macros.xml</import>
+    </macros>
+    <expand macro="requirements"/>
+    <command detect_errors="exit_code"><![CDATA[
+    @INIT_CONFIG@
+    @INIT_PCA@
+    @INIT_IMPUTE_VALUES@
+    @INIT_CONDITIONS@
+    HYDRA_FULL_ERROR=1 python -m dimet
+        -cp '$__new_file_path__/config'
+        '++hydra.run.dir=pca-analysis-tables'
+        '++figure_path=figures'
+        '++table_path=tables'
+        '++analysis={
+            dataset:{
+                _target_:dimet.data.DatasetConfig,
+                name: "Galaxy DIMet run"
+            },
+            method:{
+                _target_: dimet.method.PcaAnalysisConfig,
+                label: pca-analysis-tables,
+                name: "Generate Principal Component Analysis tables",
+                pca_split_further:['timepoint'],
+                draw_ellipses: null,
+                run_iris_demo: false,
+                impute_values:${impute_values}
+            },
+            label: pca-table
+        }'
+        '++analysis.dataset.subfolder='
+        '++analysis.dataset.label='
+        '++analysis.dataset.conditions=${conds}'
+        #if $metadata_path:
+            '++analysis.dataset.metadata=metadata'
+        #end if
+        #if $abundance_file:
+            '++analysis.dataset.abundances=abundance'
+        #end if
+        #if $me_or_frac_contrib_file:
+            '++analysis.dataset.mean_enrichment=me_or_frac_contrib'
+        #end if
+    @REMOVE_CONFIG@
+    ]]></command>
+    <inputs>
+        <expand macro="input_parameters_pca"/>
+        <expand macro="conditions"/>
+    </inputs>
+
+    <outputs>
+        <collection name="report" type="list">
+            <discover_datasets pattern="__designation__" directory="tables" format="tabular"/>
+        </collection>
+    </outputs>
+    <tests>
+        <test>
+            <param name="abundance_file" ftype="tabular" value="rawAbundances.csv" />
+            <param name="metadata_path" ftype="tabular" value="example2_metadata.csv"/>
+            <param name="conditions" value='Control,L-Cycloserine'/>
+            <output_collection name="report" type="list" count="12">
+                <element file="abundances--cell_pc.csv"  name="abundances--cell_pc.csv" ftype="tabular"/>
+                <element file="abundances--cell_var.csv" name="abundances--cell_var.csv" ftype="tabular"/>
+                <element file="abundances--med_pc.csv" name="abundances--med_pc.csv" ftype="tabular"/>
+                <element file="abundances--med_var.csv" name="abundances--med_var.csv" ftype="tabular"/>
+                <element file="abundances--T0--cell_pc.csv"  name="abundances--T0--cell_pc.csv" ftype="tabular"/>
+                <element file="abundances--T0--cell_var.csv" name="abundances--T0--cell_var.csv" ftype="tabular"/>
+                <element file="abundances--T0--med_pc.csv" name="abundances--T0--med_pc.csv" ftype="tabular"/>
+                <element file="abundances--T0--med_var.csv" name="abundances--T0--med_var.csv" ftype="tabular"/>
+                <element file="abundances--T2h--cell_pc.csv"  name="abundances--T2h--cell_pc.csv" ftype="tabular"/>
+                <element file="abundances--T2h--cell_var.csv" name="abundances--T2h--cell_var.csv" ftype="tabular"/>
+                <element file="abundances--T2h--med_pc.csv" name="abundances--T2h--med_pc.csv" ftype="tabular"/>
+                <element file="abundances--T2h--med_var.csv" name="abundances--T2h--med_var.csv" ftype="tabular"/>
+            </output_collection>
+        </test>
+    </tests>
+    <help><![CDATA[
+This module is part of DIMet: Differential analysis of Isotope-labeled targeted Metabolomics data (https://pypi.org/project/DIMet/).
+
+This tool performs the Principal Components Analysis (PCA) on your data,
+generating the tab-delimited .csv files with the results of the PCA, it is, all the principal components or "dimensions" eigenvalues, and the percentage of explained variances across all the principal components detected in your data.
+
+For automatic plotting of a PCA analysis use our tool **DIMet pca plot**
+
+    **Input data files**
+
+This tool requires (at max.) 3 tab-delimited .csv files as inputs. There are two types of files:
+
+- The measures' (or quantifications') files, that can be of 4 types.
+
+- The metadata, a unique file with the description of the samples in your measures' files. This is compulsory.
+
+For running DIMet @EXECUTABLE@ you need **at least one file** of measures:
+
+- The total **abundances** (of the metabolites) file
+
+- The mean **enrichment** or labelled fractional contributions
+
+
+and one metadata file, WHICH IS COMPULSORY, see section **Metadata File Information**.
+
+
+**Measures' files**
+
+The measure's files must be organized as matrices:
+
+- The first column must contain Metabolite IDs that are unique (not repeated) within the file.
+
+- The rest of the columns correspond to the samples
+
+- The rows correspond to the metabolites
+
+- The values must be tab separated, with the first row containing the sample/column labels.
+
+See the following examples of measures files:
+
+
+Example - Metabolites **abundances**:
+
+    =============== ================== ================== ================== ================== ================== ==================
+    ID              **MCF001089_TD01** **MCF001089_TD02** **MCF001089_TD03** **MCF001089_TD04** **MCF001089_TD05** **MCF001089_TD06**
+    =============== ================== ================== ================== ================== ================== ==================
+    2_3-PG          8698823.9926       10718737.7217      10724373.9         8536484.5          22060650           28898956
+    2-OHGLu         36924336           424336             92060650           45165              84951950           965165051
+    Glc6P           2310               2142               2683               1683               012532068          1252172
+    Gly3P           399298             991656565          525195             6365231            89451625           4952651963
+    IsoCit          0                  0                  0                  84915613           856236             954651610
+    =============== ================== ================== ================== ================== ================== ==================
+
+Example - mean **enrichment** or labeled fractional contributions:
+
+    =============== ================== ================== ================== ================== ================== ==================
+    ID              **MCF001089_TD01** **MCF001089_TD02** **MCF001089_TD03** **MCF001089_TD04** **MCF001089_TD05** **MCF001089_TD06**
+    =============== ================== ================== ================== ================== ================== ==================
+    2_3-PG          0.9711             0.968              0.9909             0.991              0.40               0.9
+    2-OHGLu         0.01719            0.0246             0.554              0.555              0.73               0.68
+    Glc6P           0.06               0.66               2683               0.06               2068               2172
+    Gly3P           0.06               0.06               0.06               1                  5                  3
+    IsoCit          0.06               1                  0.49               0.36               6                  10
+    =============== ================== ================== ================== ================== ================== ==================
+
+
+**Metadata File Information**
+
+Provide a tab-separated file that has the names of the samples in the first column and one header row.
+Column names must be exactly in this order:
+
+   name_to_plot
+   condition
+   timepoint
+   timenum
+   compartment
+   original_name
+
+
+Example **Metadata File**:
+
+
+    ==================== =============== ============= ============ ================ =================
+    **name_to_plot**     **condition**   **timepoint** **timenum**  **compartment**   **original_name**
+    -------------------- --------------- ------------- ------------ ---------------- -----------------
+    Control_cell_T0-1    Control         T0            0            cell             MCF001089_TD01
+    Control_cell_T0-2    Control         T0            0            cell             MCF001089_TD02
+    Control_cell_T0-3    Control         T0            0            cell             MCF001089_TD03
+    Tumoral_cell_T0-1    Tumoral         T0            0            cell             MCF001089_TD04
+    Tumoral_cell_T0-2    Tumoral         T0            0            cell             MCF001089_TD05
+    Tumoral_cell_T0-3    Tumoral         T0            0            cell             MCF001089_TD06
+    Tumoral_cell_T24-1   Tumoral         T24           24           cell             MCF001089_TD07
+    Tumoral_cell_T24-2   Tumoral         T24           24           cell             MCF001089_TD08
+    Tumoral_cell_T24-3   Tumoral         T24           24           cell             MCF001090_TD01
+    Control_med_T24-1    Control         T24           24           med              MCF001090_TD02
+    Control_med_T24-2    Control         T24           24           med              MCF001090_TD03
+    Tumoral_med_T24-1    Tumoral         T24           24           med              MCF001090_TD04
+    Tumoral_med_T24-2    Tumoral         T24           24           med              MCF001090_TD05
+    Control_med_T0-1     Control         T0            0            med              MCF001090_TD06
+    Tumoral_med_T0-1     Tumoral         T0            0            med              MCF001090_TD07
+    Tumoral_med_T0-2     Tumoral         T0            0            med              MCF001090_TD08
+    ==================== =============== ============= ============ ================ =================
+
+
+The column **original_name** must have the names of the samples as given in your data.
+
+The column **name_to_plot** must have the names as you want them to be (or set identical to original_name if you prefer). To set names that
+are meaningful is a better choice, as we will take them to display the results.
+
+The column **timenum** must contain only the numeric part of the timepoint, for example 2,0, 10, 100 (this means, without letters ("T", "t", "s", "h" etc)
+nor any other symbol). Make sure these time numbers are in the same units (but do not write the units here!).
+
+The column **compartment** is an abbreviation, coined by you, for the compartments. This will be used for the results' files names: the longer the
+compartments names are, the longer the output files' names! Please pick short and clear abbreviations to fill this column.
+
+
+**Running the analysis**
+
+You can precise how you want your analysis to be executed, there exist hints on use that will guide you, next to the parameters.
+
+Our tool automatically analyzes the integrality of your data (one global PCA analysis), and also splits your data by timepoint to generate PCA results by timepoint (which is convenient to explore the "grouping" of conditions), but if you only have one condition you can discard them.
+
+The output consists of two .csv files for each performed PCA analysis (one file with the Principal Components (PC), one file with the variances).
+
+**Available data for testing**
+
+You can test our tool with the data from our manuscript https://zenodo.org/record/8378887 (the pertinent
+files for you are located in the subfolders inside the data folder).
+You can also use the minimal data examples from https://zenodo.org/record/8380706
+
+ ]]>
+    </help>
+    <expand macro="citations" />
+</tool>
\ No newline at end of file
author	iuc
date	Tue, 10 Oct 2023 11:56:02 +0000
parents
children	c0103ea608fc