annotate topGO.xml @ 0:aade04e750fa draft default tip

planemo upload
author lnguyen
date Fri, 15 Sep 2017 10:38:28 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
1 <tool id="topGO" name="topGO enrichment analysis" version="0.1.0">
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
2 <requirements>
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
3 </requirements>
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
4 <stdio>
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
5 <exit_code range="1:" />
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
6 </stdio>
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
7 <command><![CDATA[
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
8
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
9 #if $inputtype.filetype == "file_all":
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
10 Rscript --vanilla $__tool_directory__/enrichment_v3.R --inputtype tabfile --input '$inputtype.genelist' --ontology '$ontocat' --option '$option' --threshold '$threshold' --correction '$correction' --textoutput '$condtext.textoutput' --barplotoutput '$condbar.barplotoutput' --dotplotoutput '$conddot.dotplotoutput' --column '$inputtype.column' --geneuniverse '$geneuniverse' --header '$inputtype.header'
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
11 #end if
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
12
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
13
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
14 #if $inputtype.filetype == "copy_paste":
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
15 Rscript --vanilla $__tool_directory__/enrichment_v3.R --inputtype copypaste --input '$inputtype.genelist' --ontology '$ontocat' --option '$option' --threshold '$threshold' --correction '$correction' --textoutput '$condtext.textoutput' --barplotoutput '$condbar.barplotoutput' --dotplotoutput '$conddot.dotplotoutput' --column c1 --geneuniverse '$geneuniverse' --header None
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
16
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
17 #end if
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
18
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
19
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
20
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
21 ]]></command>
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
22
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
23 <inputs>
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
24 <conditional name="inputtype">
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
25 <param name="filetype" type="select" label="Select your type of input file" help="The identifiers must be Ensembl gene IDs (e.g : ENSG00000139618). If it is not the case, please use the ID Mapping tool.">
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
26 <option value="file_all">Input file containing your identifiers</option>
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
27 <option value="copy_paste">Copy/paste your list of IDs</option>
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
28 </param>
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
29 <when value="copy_paste">
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
30 <param name="genelist" type="text" label="Enter a list of identifiers"/>
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
31 </when>
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
32 <when value="file_all">
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
33 <param name="genelist" type="data" format="txt,tabular" label="Choose an input file" help="This file must imperatively have 1 column filled with IDs consistent with the database that will be used. Please use the MappingIDs component if this is not the case."/>
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
34 <param name="column" type="text" label="Please specify the column where you would like to apply the comparison (e.g : Enter c1)" value="c1"/>
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
35
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
36 <param name="header" type="select" label="Does your file have a header?" multiple="false" optional="false">
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
37 <option value="TRUE" selected="true">Yes</option>
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
38 <option value="FALSE" selected="false">No</option>
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
39 </param>
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
40 </when>
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
41 </conditional>
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
42 <param name="geneuniverse" type="select" label="Select a specie">
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
43 <option value="org.At.tair.db" >Arabidopsis</option>
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
44 <option value="org.Ce.eg.db" >C.elegans</option>
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
45 <option value="org.Dm.eg.db" >Fly</option>
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
46 <option value="org.Hs.eg.db" selected="true">Human</option>
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
47 <option value="org.Mm.eg.db" >Mouse</option>
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
48 <option value="org.Sc.sqd.db" >Yeast</option>
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
49 </param>
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
50
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
51 <param name="ontocat" type="select" label="Ontology category">
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
52 <option value="BP" >Biological Process</option>
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
53 <option value="CC" >Cellular Component</option>
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
54 <option value="MF" >Molecular Function</option>
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
55 </param>
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
56
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
57 <param name="option" type="select" label="Choose the topGO option for your analysis">
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
58 <option value="classic" >Classic fisher test</option>
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
59 <option value="elim" selected="true">Elim</option>
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
60 <option value="weight01" >Weight01</option>
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
61 <option value="parentchild" >ParentChild</option>
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
62 </param>
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
63 <param name="threshold" type="text" label="Enter the p-value threshold level under the form 1e-level wanted (e.g : 1e-3)" value="1e-3"/>
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
64 <param name="correction" label="Choose a correction for multiple testing" type="select">
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
65 <option value="none" >None</option>
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
66 <option value="holm">Holm correction</option>
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
67 <option value="hochberg" >Hochberg correction</option>
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
68 <option value="hommel" >Hommel correction</option>
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
69 <option value="bonferroni" >Bonferroni correction</option>
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
70 <option value="BH" selected="true">Benjamini and Hochberg</option>
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
71 <option value="BY" >Benjamini and Yekutieli</option>
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
72 <option value="fdr" >FDR</option>
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
73 </param>
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
74 <conditional name="condtext">
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
75 <param name="textoutput" type="select" label="Generate a text file for results">
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
76 <option value="TRUE">Yes</option>
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
77 <option value="FALSE">No</option>
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
78 </param>
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
79 </conditional>
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
80 <conditional name="condbar">
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
81 <param name="barplotoutput" type="select" label="Generate a barplot of over-represented GO terms">
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
82 <option value="TRUE">Yes</option>
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
83 <option value="FALSE">No</option>
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
84 </param>
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
85 </conditional>
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
86 <conditional name="conddot">
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
87 <param name="dotplotoutput" type="select" label="Generate a dotplot of over-represented GO terms">
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
88 <option value="TRUE">Yes</option>
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
89 <option value="FALSE">No</option>
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
90 </param>
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
91 </conditional>
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
92 </inputs>
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
93 <outputs>
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
94
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
95 <data name="outputtext" format="tabular" label="Text output for topGO analysis" from_work_dir="result.csv">
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
96 <filter>condtext['textoutput']=="TRUE"</filter>
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
97 </data>
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
98
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
99 <data name="outputbarplot" format="png" label="Barplot output for topGO analysis" from_work_dir="barplot.png">
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
100 <filter>condbar['barplotoutput']=="TRUE"</filter>
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
101 </data>
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
102
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
103 <data name="outputdotplot" format="png" label="Dotplot output for topGO analysis" from_work_dir="dotplot.png">
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
104 <filter>conddot['dotplotoutput']=="TRUE"</filter>
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
105 </data>
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
106
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
107 </outputs>
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
108 <help><![CDATA[
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
109
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
110
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
111 **Galaxy component based on R package topGO.**
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
112
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
113 **Input required**
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
114
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
115 This component works with Ensembl gene ids (e.g : ENSG0000013618). You can
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
116 copy/paste these identifiers or supply a tabular file (.csv, .tsv, .txt, .tab)
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
117 where there are contained.
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
118
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
119 **Principle**
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
120
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
121 This component provides the GO terms representativity of a gene list in one ontology category (Biological Process "BP", Cellular Component "CC", Molecular Function "MF"). This representativity is evaluated in comparison to the background list of all human genes associated associated with GO terms of the chosen category (BP,CC,MF). This background is given by the R package "org.Hs.eg.db", which is a genome wide association package for **human**.
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
122
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
123 **Output**
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
124
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
125 Three kind of outputs are available : a textual output, a barplot output and
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
126 a dotplot output.
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
127
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
128 *Textual output* :
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
129 The text output lists all the GO-terms that were found significant under the specified threshold.
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
130
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
131
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
132 The different fields are as follow :
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
133
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
134 - Annotated : number of genes in org.Hs.eg.db which are annotated with the GO-term.
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
135
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
136 - Significant : number of genes belonging to your input which are annotated with the GO-term.
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
137
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
138 - Expected : show an estimate of the number of genes a node of size Annotated would have if the significant genes were to be randomly selected from the gene universe.
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
139
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
140 - pvalues : pvalue obtained after the test
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
141
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
142 - ( qvalues : additional column with adjusted pvalues )
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
143
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
144
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
145 **Tests**
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
146
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
147 topGO provides a classic fisher test for evaluating if some GO terms are over-represented in your gene list, but other options are also provided (elim, weight01,parentchild). For the merits of each option and their algorithmic descriptions, please refer to topGO manual :
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
148 https://bioconductor.org/packages/release/bioc/vignettes/topGO/inst/doc/topGO.pdf
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
149
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
150 **Multiple testing corrections**
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
151
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
152 Furthermore, the following corrections for multiple testing can also be applied :
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
153 - holm
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
154 - hochberg
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
155 - hommel
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
156 - bonferroni
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
157 - BH
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
158 - BY
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
159 - fdr
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
160
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
161
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
162
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
163 ]]></help>
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
164
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
165 <tests>
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
166 <test>
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
167 <conditional name="inputtype">
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
168 <param name="filetype " value="tabfile"/>
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
169 <param name="genelist" value="prot_reactome_EGFR_mapped_ensg.txt"/>
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
170 <param name="column" value="c1"/>
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
171 <param name="header" value="FALSE"/>
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
172 </conditional>
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
173 <param name="ontocat" value="BP"/>
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
174 <param name="option" value="elim"/>
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
175 <param name="threshold" value="1e-3"/>
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
176 <param name="correction" value="BH"/>
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
177 <conditional name="condtext">
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
178 <param name="textoutput" value="TRUE"/>
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
179 </conditional>
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
180 <conditional name="condbar">
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
181 <param name="barplotoutput" value="TRUE"/>
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
182 </conditional>
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
183 <conditional name="conddot">
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
184 <param name="dotoutput" value="TRUE"/>
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
185 </conditional>
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
186 <param name="geneuniverse" value="org.Hs.eg.db"/>
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
187 <output name="outputtext" file="Text_output_for_topGO_analysis.txt"/>
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
188 <output name="outputbarplot" file="Barplot_output_for_topGO_analysis.png"/>
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
189 <output name="outputdotplot" file="Dotplot_output_for_topGO_analysis.png"/>
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
190 </test>
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
191 </tests>
aade04e750fa planemo upload
lnguyen
parents:
diff changeset
192 </tool>