Mercurial > repos > iuc > multigsea
view multigsea.xml @ 1:e48b10ce08b8 draft default tip
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/multigsea commit 945cc63f011002e3f61d7e848d556b647e9c8878
author | iuc |
---|---|
date | Wed, 21 Feb 2024 15:41:52 +0000 |
parents | 28e29a3d0eda |
children |
line wrap: on
line source
<tool id="multigsea" name="multiGSEA" version="@TOOL_VERSION@+galaxy@SUFFIX_VERSION@" profile="@PROFILE@"> <description>GSEA-based pathway enrichment analysis for multi-omics data</description> <macros> <import>macros.xml</import> </macros> <expand macro='xrefs'/> <expand macro='requirements'/> <stdio> <regex match="Execution halted" source="both" level="fatal" description="Execution halted." /> <regex match="Error in" source="both" level="fatal" description="An undefined error occurred, please check your input carefully and contact your administrator." /> <regex match="Fatal error" source="both" level="fatal" description="An undefined error occurred, please check your input carefully and contact your administrator." /> </stdio> <command><![CDATA[ Rscript '${__tool_directory__}/multiGSEA.R' #if $transcriptomics_data.selector == "true" --transcriptomics '${transcriptomics_data.transcriptomics}' --transcriptome_ids $transcriptomics_data.transcriptome_ids #end if #if $proteomics_data.selector == "true" --proteomics '${proteomics}' --proteome_ids $proteomics_data.proteome_ids #end if #if $metabolomics_data.selector == "true" --metabolomics '${metabolomics}' --metabolome_ids $metabolomics_data.metabolome_ids #end if --organism $organism --databases $databases --combine_pvalues $combine_pvalues --padj_method $padj_method ]]></command> <inputs> <conditional name="transcriptomics_data"> <param name="selector" type="select" label="Select transcriptomics data"> <option value="true">Enabled</option> <option value="false">Disabled</option> </param> <when value="true"> <param name="transcriptomics" type="data" format="tabular" label="Transcriptomics data" help="String specifying the returned gene ID format." /> <expand macro="macro_IDs" name="transcriptome_ids" label="Gene ID format in transcriptomics data"/> </when> <when value="false"/> </conditional> <conditional name="proteomics_data"> <param name="selector" type="select" label="Select proteomics data"> <option value="true">Enabled</option> <option value="false">Disabled</option> </param> <when value="true"> <param name="proteomics" type="data" format="tabular" label="Proteomics data" help="String specifying the returned gene ID format" /> <expand macro="macro_IDs" name="proteome_ids" label="Gene ID format in proteomics data"/> </when> <when value="false"/> </conditional> <conditional name="metabolomics_data"> <param name="selector" type="select" label="Select metabolomics data"> <option value="true">Enabled</option> <option value="false">Disabled</option> </param> <when value="true"> <param name="metabolomics" type="data" format="tabular" label="Metabolomics data" help="String specifying the returned metabolite ID format." /> <param name="metabolome_ids" type="select" label="Metabolite ID format" help="String specifying the returned metabolite ID format."> <option value="HMDB">HMDB</option> <option value="CAS">CAS</option> <option value="DTXCID">DTXCID</option> <option value="DTXSID">DTXSID</option> <option value="SID">SID</option> <option value="CID">CID</option> <option value="ChEBI">ChEBI</option> <option value="KEGG">KEGG</option> <option value="Drugbank">Drugbank</option> </param> </when> <when value="false"/> </conditional> <param name="organism" type="select" label="Supported organisms"> <option value="hsapiens">Homo sapiens (Human)</option> <option value="mmusculus">Mus musculus (Mouse)</option> <option value="rnorvegicus">Rattus Norvegicus (Rat)</option> <option value="cfamiliaris">Canis lupus familiaris (Dog)</option> <option value="btaurus">Bos taurus (Cow)</option> <option value="sscrofa">Sus scrofa (Pig)</option> <option value="ggallus">Gallus gallus (Chicken)</option> <option value="xlaevis">Xenopus laevis (Flog)</option> <option value="drerio">Danio rerio (Zebrafish)</option> <option value="dmelanogaster">Drosophila melanogaster (Fruit fly)</option> <option value="celegans">Caenorabditis elegans (Roundworm)</option> </param> <param name="databases" type="select" multiple="true" label="Pathway databases" help="Available pathway databases"> <option value="all" selected="true">All available databases</option> <option value="kegg">KEGG</option> <option value="reactome">REACTOME</option> <option value="wikipathways">WIKIPATHWAYS</option> <option value="pathbank">PATHBANK</option> <option value="smpdb">SMPDB (Human only)</option> <option value="panther">PANTHER (Human only)</option> <option value="pharmgkb">PHARMGKB (Human only)</option> </param> <param name="combine_pvalues" type="select" label="Combine p-values method" help="It specifies the method to combine multiple p-values "> <option value="stouffer">Stouffer</option> <option value="fisher">Fisher</option> <option value="edgington">Edgington</option> </param> <param name="padj_method" type="select" label="P-values correction method" help=" Multiple testing corrections adjust p-values derived from multiple statistical tests to correct for occurrence of false positives"> <option value="holm">Holm</option> <option value="hochberg">Hochberg</option> <option value="hommel">Hommel</option> <option value="bonferroni">Bonferroni</option> <option value="BH" selected="true">BH</option> <option value="BY">BY</option> </param> </inputs> <outputs> <data name="output" format="tabular" from_work_dir="results.tsv" label="${tool.name} on ${on_string}: pathway enrichment"/> </outputs> <tests> <!-- Test only with transcriptomics data --> <test expect_num_outputs="1"> <param name="organism" value="hsapiens"/> <param name="databases" value="kegg"/> <param name="combine_pvalues" value="stouffer"/> <param name="padj_method" value="holm"/> <conditional name="transcriptomics_data"> <param name="selector" value="true"/> <param name="transcriptomics" value="transcriptome.tsv"/> <param name="transcriptome_ids" value="SYMBOL"/> </conditional> <output name="output"> <assert_contents> <has_size value="43574" delta="300"/> <has_n_lines n="327"/> <has_n_columns n="9"/> <has_text text="Ubiquinone and other terpenoid-quinone biosynthesis"/> </assert_contents> </output> </test> <!-- Test all inputs --> <test expect_num_outputs="1"> <param name="organism" value="hsapiens"/> <param name="databases" value="kegg"/> <param name="combine_pvalues" value="stouffer"/> <param name="padj_method" value="holm"/> <conditional name="transcriptomics_data"> <param name="selector" value="true"/> <param name="transcriptomics" value="transcriptome.tsv"/> <param name="transcriptome_ids" value="SYMBOL"/> </conditional> <conditional name="proteomics_data"> <param name="selector" value="true"/> <param name="proteomics" value="proteome.tsv"/> <param name="proteome_ids" value="SYMBOL"/> </conditional> <conditional name="metabolomics_data"> <param name="selector" value="true"/> <param name="metabolomics" value="metabolome.tsv"/> <param name="metabolome_ids" value="HMDB"/> </conditional> <output name="output"> <assert_contents> <has_n_lines n="327"/> <has_n_columns n="9"/> <has_text text="Ubiquinone and other terpenoid-quinone biosynthesis"/> </assert_contents> </output> </test> </tests> <help><![CDATA[ .. class:: infomark Purpose ======= The multiGSEA allows to perform robust GSEA-based pathway enrichment for multiple omics layers. The enrichment is calculated for each omics layer separately and aggregated p-values are calculated afterwards to derive a composite multi-omics pathway enrichment. Input requirements ================== ``multiGSEA`` can be applied with up to three different omics layers. In principle, the input format is similar between those layers, containing the feature IDs, the log2 fold change, and the p-Value. The columns have to be named as follows: :: - Symbol (feature ID) - logFC (log2 fold change) - pValue Two example omics data sets is shown below: **Trancriptomics input data** ================== ========= ============ Symbol logFC pValue ================== ========= ============ ENSRNOG00000009450 -3.447792 1.063839e-24 ENSRNOG00000011858 -2.604610 4.928870e-36 ENSRNOG00000005438 -2.743588 8.085929e-15 ENSRNOG00000005697 -3.575947 5.721265e-34 ENSRNOG00000011130 -2.507097 2.931514e-11 ENSRNOG00000002265 -2.647413 9.085615e-26 ================== ========= ============ **Proteomics input data** ======== ====== ============ Symbol logFC pValue ======== ====== ============ B1WBW4 -4.080 6.027171e-04 B2RYC9 -2.860 2.937084e-06 F1LPV8 3.370 2.930764e-13 F1LR66 5.310 3.580927e-16 P06685 5.030 1.890405e-18 P06761 0.324 4.833296e-01 ======== ====== ============ Organisms ========= ``multiGSEA`` can be applied to 11 model organisms: - *Homo sapiens* (hsapiens) - *Mus musculus* (mmusculus) - *Rattus norvegicus* (rnorvegicus) - *Canis familiaris* (cfamiliaris) - *Sus scrofa* (sscrofa) - *Bos taurus* (btaurus) - *Danio rerio* (drerio) - *Gallus gallus* (ggallus) - *Xaenopus laevis* (xlaevis) - *Caenorhabditis elegans* (celegans) - *Drosophila melanogaster* (dmelanogaster) Databases ========= Depending on the selected organism, several pathway databases can be queried. **H.sapiens** - kegg, reactome, wikipathways, panther, pathbank, pharmgkb, smpdb **M.musculus, R.norvegicus, B.taurus, C.elegans, D.melanogaster** - kegg, reactome, pathbank, wikipathways **C.familiaris, S.scrofa, D.rerio, G.gallus** - kegg, reactome, wikipathways **X.laevis** - kegg Combining p-values ================== multiGSEA provided three different methods to aggregate p-values. These methods differ in their way how they weight either small or large p-values. By default, combinePvalues will apply the Z-method or Stouffer’s method (Stouffer *et al.*, 1949) which has no bias towards small or large p-values. The widely used Fisher’s combined probability test (Fisher, 1932) can also be applied but is known for its bias towards small p-values. Edgington’s method goes the opposite direction by favoring large p-values (Edgington, 1972). Output format ============= The calculated pathway enrichment is sorted based on their combined adjusted p-values. For each individual pathway, the single omics p-Value and adjusted p-Values are collected as well as the combined p-Value and adjusted p-Value. ]]></help> <expand macro="citations" /> </tool>