diff multigsea.xml @ 0:28e29a3d0eda draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/multigsea commit 5c1b8a2b105a80e236f88e71a743147d79925ac4
author iuc
date Wed, 07 Jun 2023 19:48:50 +0000
parents
children e48b10ce08b8
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/multigsea.xml	Wed Jun 07 19:48:50 2023 +0000
@@ -0,0 +1,295 @@
+<tool id="multigsea" name="multiGSEA" version="@TOOL_VERSION@+galaxy@SUFFIX_VERSION@" profile="@PROFILE@">
+    <description>GSEA-based pathway enrichment analysis for multi-omics data</description>
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    <expand macro='xrefs'/>
+    <expand macro='requirements'/>
+    <stdio>
+        <regex match="Execution halted"
+           source="both"
+           level="fatal"
+           description="Execution halted." />
+        <regex match="Error in"
+           source="both"
+           level="fatal"
+           description="An undefined error occurred, please check your input carefully and contact your administrator." />
+        <regex match="Fatal error"
+           source="both"
+           level="fatal"
+           description="An undefined error occurred, please check your input carefully and contact your administrator." />
+    </stdio>
+    <command><![CDATA[
+
+        Rscript '${__tool_directory__}/multiGSEA.R'
+            #if $transcriptomics_data.selector == "true"
+                --transcriptomics '${transcriptomics_data.transcriptomics}'
+                --transcriptome_ids $transcriptomics_data.transcriptome_ids
+            #end if
+            #if $proteomics_data.selector == "true"
+                --proteomics '${proteomics}'
+                --proteome_ids $proteomics_data.proteome_ids
+            #end if
+            #if $metabolomics_data.selector == "true"
+                --metabolomics '${metabolomics}'
+                --metabolome_ids $metabolomics_data.metabolome_ids
+            #end if
+            --organism $organism
+            --databases $databases
+            --combine_pvalues $combine_pvalues
+            --padj_method $padj_method
+
+    ]]></command>
+    <inputs>
+        <conditional name="transcriptomics_data">
+            <param name="selector" type="select" label="Select transcriptomics data">
+                <option value="true">Enabled</option>
+                <option value="false">Disabled</option>
+            </param>
+            <when value="true">
+                <param name="transcriptomics" type="data" format="tabular" label="Transcriptomics data" 
+                    help="String specifying the returned gene ID format." />
+                <expand macro="macro_IDs" name="transcriptome_ids" label="Gene ID format in transcriptomics data"/>
+            </when>
+            <when value="false"/>
+        </conditional>
+        <conditional name="proteomics_data">
+            <param name="selector" type="select" label="Select proteomics data">
+                <option value="true">Enabled</option>
+                <option value="false">Disabled</option>
+            </param>
+            <when value="true">
+                <param name="proteomics" type="data" format="tabular" label="Proteomics data" 
+                    help="String specifying the returned gene ID format" />
+                <expand macro="macro_IDs" name="proteome_ids" label="Gene ID format in proteomics data"/>
+            </when>
+            <when value="false"/>
+        </conditional>
+        <conditional name="metabolomics_data">
+            <param name="selector" type="select" label="Select metabolomics data">
+                <option value="true">Enabled</option>
+                <option value="false">Disabled</option>
+            </param>
+            <when value="true">
+                <param name="metabolomics" type="data" format="tabular" label="Metabolomics data" 
+                    help="String specifying the returned metabolite ID format." />
+                <param name="metabolome_ids" type="select" label="Metabolite ID format" 
+                    help="String specifying the returned metabolite ID format.">
+                    <option value="HMDB">HMDB</option>
+                    <option value="CAS">CAS</option>
+                    <option value="DTXCID">DTXCID</option>
+                    <option value="DTXSID">DTXSID</option>
+                    <option value="SID">SID</option>
+                    <option value="CID">CID</option>
+                    <option value="ChEBI">ChEBI</option>
+                    <option value="KEGG">KEGG</option>
+                    <option value="Drugbank">Drugbank</option>
+                </param>
+            </when>
+            <when value="false"/>
+        </conditional>
+        <param name="organism" type="select" label="Supported organisms">
+            <option value="hsapiens">Homo sapiens (Human)</option>
+            <option value="mmusculus">Mus musculus (Mouse)</option>
+            <option value="rnorvegicus">Rattus Norvegicus (Rat)</option>
+            <option value="cfamiliaris">Canis lupus familiaris (Dog)</option>
+            <option value="btaurus">Bos taurus (Cow)</option>
+            <option value="sscrofa">Sus scrofa (Pig)</option>
+            <option value="ggallus">Gallus gallus (Chicken)</option>
+            <option value="xlaevis">Xenopus laevis (Flog)</option>
+            <option value="drerio">Danio rerio (Zebrafish)</option>
+            <option value="dmelanogaster">Drosophila melanogaster (Fruit fly)</option>
+            <option value="celegans">Caenorabditis elegans (Roundworm)</option>
+        </param>
+        <param name="databases" type="select" multiple="true" label="Pathway databases" help="Available pathway databases">
+            <option value="all" selected="true">All available databases</option>
+            <option value="kegg">KEGG</option>
+            <option value="reactome">REACTOME</option>
+	    <option value="wikipathways">WIKIPATHWAYS</option>
+	    <option value="pathbank">PATHBANK</option>
+	    <option value="smpdb">SMPDB (Human only)</option>
+	    <option value="panther">PANTHER (Human only)</option>
+	    <option value="pharmgkb">PHARMGKB (Human only)</option>
+        </param>
+        <param name="combine_pvalues" type="select" label="Combine p-values method" help="It specifies the method to combine multiple p-values ">
+            <option value="stouffer">Stouffer</option>
+            <option value="fisher">Fisher</option>
+            <option value="edgington">Edgington</option>
+        </param>
+        <param name="padj_method" type="select" label="P-values correction method" help=" Multiple testing corrections adjust 
+            p-values derived from multiple statistical tests to correct for occurrence of false positives">
+            <option value="holm">Holm</option>
+            <option value="hochberg">Hochberg</option>
+            <option value="hommel">Hommel</option>
+            <option value="bonferroni">Bonferroni</option>
+            <option value="BH" selected="true">BH</option>
+            <option value="BY">BY</option>
+        </param>
+    </inputs>
+    <outputs>
+        <data name="output" format="tabular" from_work_dir="results.tsv" label="${tool.name} on ${on_string}: pathway enrichment"/>
+    </outputs>
+    <tests>
+        <!-- Test only with transcriptomics data -->
+        <test expect_num_outputs="1">
+            <param name="organism" value="hsapiens"/>
+            <param name="databases" value="kegg"/>
+            <param name="combine_pvalues" value="stouffer"/>
+            <param name="padj_method" value="holm"/>
+            <conditional name="transcriptomics_data">
+                <param name="selector" value="true"/>
+                <param name="transcriptomics" value="transcriptome.tsv"/>
+                <param name="transcriptome_ids" value="SYMBOL"/>
+            </conditional>
+            <output name="output">
+                <assert_contents>
+                    <has_size value="43574" delta="300"/>
+                    <has_text text="Ubiquinone and other terpenoid-quinone biosynthesis"/>
+                </assert_contents>
+            </output>
+        </test>
+        <!-- Test all inputs -->
+        <test expect_num_outputs="1">
+            <param name="organism" value="hsapiens"/>
+            <param name="databases" value="kegg"/>
+            <param name="combine_pvalues" value="stouffer"/>
+            <param name="padj_method" value="holm"/>
+            <conditional name="transcriptomics_data">
+                <param name="selector" value="true"/>
+                <param name="transcriptomics" value="transcriptome.tsv"/>
+                <param name="transcriptome_ids" value="SYMBOL"/>
+            </conditional>
+            <conditional name="proteomics_data">
+                <param name="selector" value="true"/>
+                <param name="proteomics" value="proteome.tsv"/>
+                <param name="proteome_ids" value="SYMBOL"/>
+            </conditional>
+            <conditional name="metabolomics_data">
+                <param name="selector" value="true"/>
+                <param name="metabolomics" value="metabolome.tsv"/>
+                <param name="metabolome_ids" value="HMDB"/>
+            </conditional>
+            <output name="output">
+                <assert_contents>
+                    <has_size value="42541" delta="300"/>
+                    <has_text text="Ubiquinone and other terpenoid-quinone biosynthesis"/>
+                </assert_contents>
+            </output>
+        </test>    
+    </tests>
+    <help><![CDATA[
+
+.. class:: infomark
+
+Purpose
+=======
+
+The multiGSEA allows to perform robust GSEA-based pathway enrichment for
+multiple omics layers. The enrichment is calculated for each omics layer
+separately and aggregated p-values are calculated afterwards to derive a
+composite multi-omics pathway enrichment.
+
+Input requirements
+==================
+
+``multiGSEA`` can be applied with up to three different omics layers. In
+principle, the input format is similar between those layers, containing
+the feature IDs, the log2 fold change, and the p-Value.
+
+The columns have to be named as follows:
+
+::
+
+   - Symbol (feature ID)
+   - logFC  (log2 fold change)
+   - pValue 
+
+Two example omics data sets is shown below:
+
+**Trancriptomics input data**
+
+================== ========= ============
+Symbol             logFC     pValue
+================== ========= ============
+ENSRNOG00000009450 -3.447792 1.063839e-24
+ENSRNOG00000011858 -2.604610 4.928870e-36
+ENSRNOG00000005438 -2.743588 8.085929e-15
+ENSRNOG00000005697 -3.575947 5.721265e-34
+ENSRNOG00000011130 -2.507097 2.931514e-11
+ENSRNOG00000002265 -2.647413 9.085615e-26
+================== ========= ============
+
+**Proteomics input data**
+
+======== ====== ============
+Symbol   logFC  pValue
+======== ====== ============
+B1WBW4   -4.080 6.027171e-04
+B2RYC9   -2.860 2.937084e-06
+F1LPV8   3.370  2.930764e-13
+F1LR66   5.310  3.580927e-16
+P06685   5.030  1.890405e-18
+P06761   0.324  4.833296e-01
+======== ====== ============
+
+Organisms
+=========
+
+``multiGSEA`` can be applied to 11 model organisms:
+
+-  *Homo sapiens* (hsapiens)
+-  *Mus musculus* (mmusculus)
+-  *Rattus norvegicus* (rnorvegicus)
+-  *Canis familiaris* (cfamiliaris)
+-  *Sus scrofa* (sscrofa)
+-  *Bos taurus* (btaurus)
+-  *Danio rerio* (drerio)
+-  *Gallus gallus* (ggallus)
+-  *Xaenopus laevis* (xlaevis)
+-  *Caenorhabditis elegans* (celegans)
+-  *Drosophila melanogaster* (dmelanogaster)
+
+Databases
+=========
+
+Depending on the selected organism, several pathway databases can be
+queried.
+
+**H.sapiens**
+
+ - kegg, reactome, wikipathways, panther, pathbank, pharmgkb, smpdb
+
+**M.musculus, R.norvegicus, B.taurus, C.elegans, D.melanogaster**
+
+ - kegg, reactome, pathbank, wikipathways
+
+**C.familiaris, S.scrofa, D.rerio, G.gallus**
+
+ - kegg, reactome, wikipathways
+
+**X.laevis**
+
+ - kegg
+
+Combining p-values
+==================
+
+multiGSEA provided three different methods to aggregate p-values. These
+methods differ in their way how they weight either small or large
+p-values. By default, combinePvalues will apply the Z-method or
+Stouffer’s method (Stouffer *et al.*, 1949) which has no bias towards
+small or large p-values. The widely used Fisher’s combined probability
+test (Fisher, 1932) can also be applied but is known for its bias
+towards small p-values. Edgington’s method goes the opposite direction
+by favoring large p-values (Edgington, 1972).
+
+
+Output format
+=============
+
+The calculated pathway enrichment is sorted based on their combined adjusted p-values. For each individual pathway, the single omics p-Value and adjusted p-Values are collected as well as the combined p-Value and adjusted p-Value.
+
+
+    ]]></help>
+    <expand macro="citations" />
+</tool>