comparison multigsea.xml @ 0:28e29a3d0eda draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/multigsea commit 5c1b8a2b105a80e236f88e71a743147d79925ac4
author iuc
date Wed, 07 Jun 2023 19:48:50 +0000
parents
children e48b10ce08b8
comparison
equal deleted inserted replaced
-1:000000000000 0:28e29a3d0eda
1 <tool id="multigsea" name="multiGSEA" version="@TOOL_VERSION@+galaxy@SUFFIX_VERSION@" profile="@PROFILE@">
2 <description>GSEA-based pathway enrichment analysis for multi-omics data</description>
3 <macros>
4 <import>macros.xml</import>
5 </macros>
6 <expand macro='xrefs'/>
7 <expand macro='requirements'/>
8 <stdio>
9 <regex match="Execution halted"
10 source="both"
11 level="fatal"
12 description="Execution halted." />
13 <regex match="Error in"
14 source="both"
15 level="fatal"
16 description="An undefined error occurred, please check your input carefully and contact your administrator." />
17 <regex match="Fatal error"
18 source="both"
19 level="fatal"
20 description="An undefined error occurred, please check your input carefully and contact your administrator." />
21 </stdio>
22 <command><![CDATA[
23
24 Rscript '${__tool_directory__}/multiGSEA.R'
25 #if $transcriptomics_data.selector == "true"
26 --transcriptomics '${transcriptomics_data.transcriptomics}'
27 --transcriptome_ids $transcriptomics_data.transcriptome_ids
28 #end if
29 #if $proteomics_data.selector == "true"
30 --proteomics '${proteomics}'
31 --proteome_ids $proteomics_data.proteome_ids
32 #end if
33 #if $metabolomics_data.selector == "true"
34 --metabolomics '${metabolomics}'
35 --metabolome_ids $metabolomics_data.metabolome_ids
36 #end if
37 --organism $organism
38 --databases $databases
39 --combine_pvalues $combine_pvalues
40 --padj_method $padj_method
41
42 ]]></command>
43 <inputs>
44 <conditional name="transcriptomics_data">
45 <param name="selector" type="select" label="Select transcriptomics data">
46 <option value="true">Enabled</option>
47 <option value="false">Disabled</option>
48 </param>
49 <when value="true">
50 <param name="transcriptomics" type="data" format="tabular" label="Transcriptomics data"
51 help="String specifying the returned gene ID format." />
52 <expand macro="macro_IDs" name="transcriptome_ids" label="Gene ID format in transcriptomics data"/>
53 </when>
54 <when value="false"/>
55 </conditional>
56 <conditional name="proteomics_data">
57 <param name="selector" type="select" label="Select proteomics data">
58 <option value="true">Enabled</option>
59 <option value="false">Disabled</option>
60 </param>
61 <when value="true">
62 <param name="proteomics" type="data" format="tabular" label="Proteomics data"
63 help="String specifying the returned gene ID format" />
64 <expand macro="macro_IDs" name="proteome_ids" label="Gene ID format in proteomics data"/>
65 </when>
66 <when value="false"/>
67 </conditional>
68 <conditional name="metabolomics_data">
69 <param name="selector" type="select" label="Select metabolomics data">
70 <option value="true">Enabled</option>
71 <option value="false">Disabled</option>
72 </param>
73 <when value="true">
74 <param name="metabolomics" type="data" format="tabular" label="Metabolomics data"
75 help="String specifying the returned metabolite ID format." />
76 <param name="metabolome_ids" type="select" label="Metabolite ID format"
77 help="String specifying the returned metabolite ID format.">
78 <option value="HMDB">HMDB</option>
79 <option value="CAS">CAS</option>
80 <option value="DTXCID">DTXCID</option>
81 <option value="DTXSID">DTXSID</option>
82 <option value="SID">SID</option>
83 <option value="CID">CID</option>
84 <option value="ChEBI">ChEBI</option>
85 <option value="KEGG">KEGG</option>
86 <option value="Drugbank">Drugbank</option>
87 </param>
88 </when>
89 <when value="false"/>
90 </conditional>
91 <param name="organism" type="select" label="Supported organisms">
92 <option value="hsapiens">Homo sapiens (Human)</option>
93 <option value="mmusculus">Mus musculus (Mouse)</option>
94 <option value="rnorvegicus">Rattus Norvegicus (Rat)</option>
95 <option value="cfamiliaris">Canis lupus familiaris (Dog)</option>
96 <option value="btaurus">Bos taurus (Cow)</option>
97 <option value="sscrofa">Sus scrofa (Pig)</option>
98 <option value="ggallus">Gallus gallus (Chicken)</option>
99 <option value="xlaevis">Xenopus laevis (Flog)</option>
100 <option value="drerio">Danio rerio (Zebrafish)</option>
101 <option value="dmelanogaster">Drosophila melanogaster (Fruit fly)</option>
102 <option value="celegans">Caenorabditis elegans (Roundworm)</option>
103 </param>
104 <param name="databases" type="select" multiple="true" label="Pathway databases" help="Available pathway databases">
105 <option value="all" selected="true">All available databases</option>
106 <option value="kegg">KEGG</option>
107 <option value="reactome">REACTOME</option>
108 <option value="wikipathways">WIKIPATHWAYS</option>
109 <option value="pathbank">PATHBANK</option>
110 <option value="smpdb">SMPDB (Human only)</option>
111 <option value="panther">PANTHER (Human only)</option>
112 <option value="pharmgkb">PHARMGKB (Human only)</option>
113 </param>
114 <param name="combine_pvalues" type="select" label="Combine p-values method" help="It specifies the method to combine multiple p-values ">
115 <option value="stouffer">Stouffer</option>
116 <option value="fisher">Fisher</option>
117 <option value="edgington">Edgington</option>
118 </param>
119 <param name="padj_method" type="select" label="P-values correction method" help=" Multiple testing corrections adjust
120 p-values derived from multiple statistical tests to correct for occurrence of false positives">
121 <option value="holm">Holm</option>
122 <option value="hochberg">Hochberg</option>
123 <option value="hommel">Hommel</option>
124 <option value="bonferroni">Bonferroni</option>
125 <option value="BH" selected="true">BH</option>
126 <option value="BY">BY</option>
127 </param>
128 </inputs>
129 <outputs>
130 <data name="output" format="tabular" from_work_dir="results.tsv" label="${tool.name} on ${on_string}: pathway enrichment"/>
131 </outputs>
132 <tests>
133 <!-- Test only with transcriptomics data -->
134 <test expect_num_outputs="1">
135 <param name="organism" value="hsapiens"/>
136 <param name="databases" value="kegg"/>
137 <param name="combine_pvalues" value="stouffer"/>
138 <param name="padj_method" value="holm"/>
139 <conditional name="transcriptomics_data">
140 <param name="selector" value="true"/>
141 <param name="transcriptomics" value="transcriptome.tsv"/>
142 <param name="transcriptome_ids" value="SYMBOL"/>
143 </conditional>
144 <output name="output">
145 <assert_contents>
146 <has_size value="43574" delta="300"/>
147 <has_text text="Ubiquinone and other terpenoid-quinone biosynthesis"/>
148 </assert_contents>
149 </output>
150 </test>
151 <!-- Test all inputs -->
152 <test expect_num_outputs="1">
153 <param name="organism" value="hsapiens"/>
154 <param name="databases" value="kegg"/>
155 <param name="combine_pvalues" value="stouffer"/>
156 <param name="padj_method" value="holm"/>
157 <conditional name="transcriptomics_data">
158 <param name="selector" value="true"/>
159 <param name="transcriptomics" value="transcriptome.tsv"/>
160 <param name="transcriptome_ids" value="SYMBOL"/>
161 </conditional>
162 <conditional name="proteomics_data">
163 <param name="selector" value="true"/>
164 <param name="proteomics" value="proteome.tsv"/>
165 <param name="proteome_ids" value="SYMBOL"/>
166 </conditional>
167 <conditional name="metabolomics_data">
168 <param name="selector" value="true"/>
169 <param name="metabolomics" value="metabolome.tsv"/>
170 <param name="metabolome_ids" value="HMDB"/>
171 </conditional>
172 <output name="output">
173 <assert_contents>
174 <has_size value="42541" delta="300"/>
175 <has_text text="Ubiquinone and other terpenoid-quinone biosynthesis"/>
176 </assert_contents>
177 </output>
178 </test>
179 </tests>
180 <help><![CDATA[
181
182 .. class:: infomark
183
184 Purpose
185 =======
186
187 The multiGSEA allows to perform robust GSEA-based pathway enrichment for
188 multiple omics layers. The enrichment is calculated for each omics layer
189 separately and aggregated p-values are calculated afterwards to derive a
190 composite multi-omics pathway enrichment.
191
192 Input requirements
193 ==================
194
195 ``multiGSEA`` can be applied with up to three different omics layers. In
196 principle, the input format is similar between those layers, containing
197 the feature IDs, the log2 fold change, and the p-Value.
198
199 The columns have to be named as follows:
200
201 ::
202
203 - Symbol (feature ID)
204 - logFC (log2 fold change)
205 - pValue
206
207 Two example omics data sets is shown below:
208
209 **Trancriptomics input data**
210
211 ================== ========= ============
212 Symbol logFC pValue
213 ================== ========= ============
214 ENSRNOG00000009450 -3.447792 1.063839e-24
215 ENSRNOG00000011858 -2.604610 4.928870e-36
216 ENSRNOG00000005438 -2.743588 8.085929e-15
217 ENSRNOG00000005697 -3.575947 5.721265e-34
218 ENSRNOG00000011130 -2.507097 2.931514e-11
219 ENSRNOG00000002265 -2.647413 9.085615e-26
220 ================== ========= ============
221
222 **Proteomics input data**
223
224 ======== ====== ============
225 Symbol logFC pValue
226 ======== ====== ============
227 B1WBW4 -4.080 6.027171e-04
228 B2RYC9 -2.860 2.937084e-06
229 F1LPV8 3.370 2.930764e-13
230 F1LR66 5.310 3.580927e-16
231 P06685 5.030 1.890405e-18
232 P06761 0.324 4.833296e-01
233 ======== ====== ============
234
235 Organisms
236 =========
237
238 ``multiGSEA`` can be applied to 11 model organisms:
239
240 - *Homo sapiens* (hsapiens)
241 - *Mus musculus* (mmusculus)
242 - *Rattus norvegicus* (rnorvegicus)
243 - *Canis familiaris* (cfamiliaris)
244 - *Sus scrofa* (sscrofa)
245 - *Bos taurus* (btaurus)
246 - *Danio rerio* (drerio)
247 - *Gallus gallus* (ggallus)
248 - *Xaenopus laevis* (xlaevis)
249 - *Caenorhabditis elegans* (celegans)
250 - *Drosophila melanogaster* (dmelanogaster)
251
252 Databases
253 =========
254
255 Depending on the selected organism, several pathway databases can be
256 queried.
257
258 **H.sapiens**
259
260 - kegg, reactome, wikipathways, panther, pathbank, pharmgkb, smpdb
261
262 **M.musculus, R.norvegicus, B.taurus, C.elegans, D.melanogaster**
263
264 - kegg, reactome, pathbank, wikipathways
265
266 **C.familiaris, S.scrofa, D.rerio, G.gallus**
267
268 - kegg, reactome, wikipathways
269
270 **X.laevis**
271
272 - kegg
273
274 Combining p-values
275 ==================
276
277 multiGSEA provided three different methods to aggregate p-values. These
278 methods differ in their way how they weight either small or large
279 p-values. By default, combinePvalues will apply the Z-method or
280 Stouffer’s method (Stouffer *et al.*, 1949) which has no bias towards
281 small or large p-values. The widely used Fisher’s combined probability
282 test (Fisher, 1932) can also be applied but is known for its bias
283 towards small p-values. Edgington’s method goes the opposite direction
284 by favoring large p-values (Edgington, 1972).
285
286
287 Output format
288 =============
289
290 The calculated pathway enrichment is sorted based on their combined adjusted p-values. For each individual pathway, the single omics p-Value and adjusted p-Values are collected as well as the combined p-Value and adjusted p-Value.
291
292
293 ]]></help>
294 <expand macro="citations" />
295 </tool>