comparison goseq.xml @ 8:8b3e3657034e draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/goseq commit 8e19f8bcaea6f607a1eaa14bb88f2d625ed63df0"
author iuc
date Fri, 06 Sep 2019 07:50:46 -0400
parents 67c29afac85f
children ef2ad746b589
comparison
equal deleted inserted replaced
7:67c29afac85f 8:8b3e3657034e
1 <tool id="goseq" name="goseq" version="1.34.0+galaxy1"> 1 <tool id="goseq" name="goseq" version="@VERSION@+@GALAXY_VERSION@">
2 <description>tests for overrepresented gene categories</description> 2 <description>tests for overrepresented gene categories</description>
3 <macros>
4 <token name="@VERSION@">1.36.0</token>
5 <token name="@GALAXY_VERSION@">galaxy0</token>
6 </macros>
3 <requirements> 7 <requirements>
4 <requirement type="package" version="1.34.0">bioconductor-goseq</requirement> 8 <requirement type="package" version="@VERSION@">bioconductor-goseq</requirement>
5 <requirement type="package" version="3.7.0">bioconductor-org.hs.eg.db</requirement> 9 <requirement type="package" version="3.8.2">bioconductor-org.hs.eg.db</requirement>
6 <requirement type="package" version="3.7.0">bioconductor-org.dm.eg.db</requirement> 10 <requirement type="package" version="3.8.2">bioconductor-org.dm.eg.db</requirement>
7 <requirement type="package" version="3.7.0">bioconductor-org.dr.eg.db</requirement> 11 <requirement type="package" version="3.8.2">bioconductor-org.dr.eg.db</requirement>
8 <requirement type="package" version="3.7.0">bioconductor-org.mm.eg.db</requirement> 12 <requirement type="package" version="3.8.2">bioconductor-org.mm.eg.db</requirement>
9 <requirement type="package" version="0.7.8">r-dplyr</requirement> 13 <requirement type="package" version="0.8.3">r-dplyr</requirement>
10 <requirement type="package" version="3.1.0">r-ggplot2</requirement> 14 <requirement type="package" version="3.2.1">r-ggplot2</requirement>
11 <requirement type="package" version="1.6.0">r-optparse</requirement> 15 <requirement type="package" version="1.6.2">r-optparse</requirement>
12 </requirements> 16 </requirements>
13 <stdio> 17 <stdio>
14 <regex match="Execution halted" 18 <regex match="Execution halted"
15 source="both" 19 source="both"
16 level="fatal" 20 level="fatal"
28 echo $(R --version | grep version | grep -v GNU)", goseq version" $(R --vanilla --slave -e "library(goseq); cat(sessionInfo()\$otherPkgs\$goseq\$Version)" 2> /dev/null | grep -v -i "WARNING: ")", optparse version" $(R --vanilla --slave -e "library(optparse); cat(sessionInfo()\$otherPkgs\$optparse\$Version)" 2> /dev/null | grep -v -i "WARNING: ")", org.Hs.eg.db version" $(R --vanilla --slave -e "library(org.Hs.eg.db); cat(sessionInfo()\$otherPkgs\$org.Hs.eg.db\$Version)" 2> /dev/null | grep -v -i "WARNING: ")", org.Dr.eg.db version" $(R --vanilla --slave -e "library(org.Dr.eg.db); cat(sessionInfo()\$otherPkgs\$org.Dr.eg.db\$Version)" 2> /dev/null | grep -v -i "WARNING: ")", org.Dm.eg.db version" $(R --vanilla --slave -e "library(org.Dm.eg.db); cat(sessionInfo()\$otherPkgs\$org.Dm.eg.db\$Version)" 2> /dev/null | grep -v -i "WARNING: ")", org.Mm.eg.db version" $(R --vanilla --slave -e "library(org.Mm.eg.db); cat(sessionInfo()\$otherPkgs\$org.Mm.eg.db\$Version)" 2> /dev/null | grep -v -i "WARNING: ")", dplyr version" $(R --vanilla --slave -e "library(dplyr); cat(sessionInfo()\$otherPkgs\$dplyr\$Version)" 2> /dev/null | grep -v -i "WARNING: ")", ggplot2 version" $(R --vanilla --slave -e "library(ggplot2); cat(sessionInfo()\$otherPkgs\$ggplot2\$Version)" 2> /dev/null | grep -v -i "WARNING: ") 32 echo $(R --version | grep version | grep -v GNU)", goseq version" $(R --vanilla --slave -e "library(goseq); cat(sessionInfo()\$otherPkgs\$goseq\$Version)" 2> /dev/null | grep -v -i "WARNING: ")", optparse version" $(R --vanilla --slave -e "library(optparse); cat(sessionInfo()\$otherPkgs\$optparse\$Version)" 2> /dev/null | grep -v -i "WARNING: ")", org.Hs.eg.db version" $(R --vanilla --slave -e "library(org.Hs.eg.db); cat(sessionInfo()\$otherPkgs\$org.Hs.eg.db\$Version)" 2> /dev/null | grep -v -i "WARNING: ")", org.Dr.eg.db version" $(R --vanilla --slave -e "library(org.Dr.eg.db); cat(sessionInfo()\$otherPkgs\$org.Dr.eg.db\$Version)" 2> /dev/null | grep -v -i "WARNING: ")", org.Dm.eg.db version" $(R --vanilla --slave -e "library(org.Dm.eg.db); cat(sessionInfo()\$otherPkgs\$org.Dm.eg.db\$Version)" 2> /dev/null | grep -v -i "WARNING: ")", org.Mm.eg.db version" $(R --vanilla --slave -e "library(org.Mm.eg.db); cat(sessionInfo()\$otherPkgs\$org.Mm.eg.db\$Version)" 2> /dev/null | grep -v -i "WARNING: ")", dplyr version" $(R --vanilla --slave -e "library(dplyr); cat(sessionInfo()\$otherPkgs\$dplyr\$Version)" 2> /dev/null | grep -v -i "WARNING: ")", ggplot2 version" $(R --vanilla --slave -e "library(ggplot2); cat(sessionInfo()\$otherPkgs\$ggplot2\$Version)" 2> /dev/null | grep -v -i "WARNING: ")
29 ]]></version_command> 33 ]]></version_command>
30 <command><![CDATA[ 34 <command><![CDATA[
31 Rscript '$__tool_directory__/goseq.r' 35 Rscript '$__tool_directory__/goseq.r'
32 36
33 --dge_file '$dge_file' 37 --dge_file '$dge_file'
34 --length_file '$length_file' 38 --length_file '$length_file'
35 39
36 #if $categorySource.catSource == 'getgo': 40 #if $categorySource.catSource == 'getgo'
37 --genome $categorySource.genome 41 --genome $categorySource.genome
38 --gene_id $categorySource.gene_id 42 --gene_id $categorySource.gene_id
39 --fetch_cats '$categorySource.fetchcats' 43 --fetch_cats '$categorySource.fetchcats'
40 #elif $categorySource.catSource == 'history': 44 #elif $categorySource.catSource == 'history'
41 --category_file '$categorySource.category_file' 45 --category_file '$categorySource.category_file'
42 #end if 46 #end if
43 47
44 #if $methods['wallenius']: 48 #if $methods.wallenius
45 --wallenius_tab '$wallenius_tab' 49 --wallenius_tab '$wallenius_tab'
46 #end if 50 #end if
47 #if $methods['hypergeometric']: 51 #if $methods.hypergeometric
48 --nobias_tab '$nobias_tab' 52 --nobias_tab '$nobias_tab'
49 #end if 53 #end if
50 --repcnt '$methods.repcnt' 54
51 --sampling_tab '$sampling_tab' 55 --repcnt $methods.repcnt
52 56 #if $methods.repcnt != 0
53 --make_plots '$out.make_plots' 57 --sampling_tab '$sampling_tab'
54 --length_bias_plot '$length_bias_plot' 58 #end if
55 --sample_vs_wallenius_plot '$sample_vs_wallenius_plot' 59
56 60 --p_adj_method '$adv.p_adj_method'
57 --rdata '$out.rdata_out' 61 --use_genes_without_cat '$adv.use_genes_without_cat'
58 --p_adj_method '$adv.p_adj_method' 62
59 --use_genes_without_cat '$adv.use_genes_without_cat' 63 #if $out.topgo_plot
60 64 --top_plot '$top_plot'
61 #if $out.topgo_plot: 65 #end if
62 --top_plot '$out.topgo_plot' 66
63 #end if 67 #if str($out.make_plots) == 'TRUE'
64 68 --make_plots '$out.make_plots'
69 --length_bias_plot '$length_bias_plot'
70 #if $methods.repcnt != 0 and $methods.wallenius
71 --sample_vs_wallenius_plot '$sample_vs_wallenius_plot'
72 #end if
73 #end if
74
75 #if $out.cat_genes
76 --categories_genes_out_fp '$cat_genes_tab'
77 #end if
78
79 #if $out.rdata_out
80 --rdata '$rdata'
81 #end if
65 ]]></command> 82 ]]></command>
66
67 <!-- Input Files-->
68 <inputs> 83 <inputs>
84 <!-- Input Files-->
69 <param name="dge_file" type="data" format="tabular" label="Differentially expressed genes file" help="A tabular file with Gene IDs in the first column, and True or False in the second column. True means a gene is differentially expressed. See Help section for details."/> 85 <param name="dge_file" type="data" format="tabular" label="Differentially expressed genes file" help="A tabular file with Gene IDs in the first column, and True or False in the second column. True means a gene is differentially expressed. See Help section for details."/>
70 <param name="length_file" type="data" format="tabular" label="Gene lengths file" help="You can calculate the gene lengths using featureCounts or the Gene length and GC content tool."/> 86 <param name="length_file" type="data" format="tabular" label="Gene lengths file" help="You can calculate the gene lengths using featureCounts or the Gene length and GC content tool."/>
71 <conditional name="categorySource"> 87 <conditional name="categorySource">
72 <param name="catSource" type="select" format="tabular" label="Gene categories" help="You can obtain a mapping of genes to categories (for some genomes only) or you can provide your own category file."> 88 <param name="catSource" type="select" format="tabular" label="Gene categories" help="You can obtain a mapping of genes to categories (for some genomes only) or you can provide your own category file.">
73 <option value="getgo" selected="true">Get categories</option> 89 <option value="getgo" selected="true">Get categories</option>
94 </when> 110 </when>
95 <when value="history"> 111 <when value="history">
96 <param name="category_file" type="data" format="tabular" label="Gene category file"/> 112 <param name="category_file" type="data" format="tabular" label="Gene category file"/>
97 </when> 113 </when>
98 </conditional> 114 </conditional>
99
100 <!-- Method Options --> 115 <!-- Method Options -->
101 <section name="methods" title="Method Options"> 116 <section name="methods" title="Method Options">
102 <param name="wallenius" type="boolean" checked="true" label="Use Wallenius method" help="See help for details. Default: Yes" /> 117 <param name="wallenius" type="boolean" checked="true" label="Use Wallenius method" help="See help for details" />
103 <param name="hypergeometric" type="boolean" checked="false" label="Use Hypergeometric method" help="Does not use gene length information. See help for details. Default: No" /> 118 <param name="hypergeometric" type="boolean" checked="false" label="Use Hypergeometric method" help="Does not use gene length information. See help for details" />
104 <param name="repcnt" type="integer" size="3" min="0" max="10000" value="0" label="Sampling number" help="Number of random samples to be calculated when sampling is used. Set to 0 to not do sampling. Larger values take a long time. Default: 0" /> 119 <param name="repcnt" type="integer" size="3" min="0" max="10000" value="0" label="Sampling number" help="Number of random samples to be calculated when sampling is used. Set to 0 to not do sampling. Larger values take a long time" />
105 </section> 120 </section>
106
107 <!-- Output Options -->
108 <section name="out" title="Output Options">
109 <param name="topgo_plot" type="boolean" checked="false" label="Output Top GO terms plot?" help="Output a PDF plot of the Top 10 over-represented GO terms. Default: No" />
110 <param name="make_plots" type="boolean" checked="false" label="Produce diagnostic plots?" help="This will produce the length bias (PWF) plot. If both sampling and wallenius methods are selected, it will also produce a plot comparing their p-values. These plots may help you compare the different p-value estimation methods that goseq can use. Default: No" />
111 <param name="rdata_out" type="boolean" checked="false" label="Output RData file?" help="Output all the data used by R to construct the tables and plots, can be loaded into R. Default: No" />
112 </section>
113
114 <!-- Advanced Options --> 121 <!-- Advanced Options -->
115 <section name="adv" title="Advanced Options"> 122 <section name="adv" title="Advanced Options">
116 <param name="p_adj_method" type="select" label="Select a method for multiple hypothesis testing correction"> 123 <param name="p_adj_method" type="select" label="Select a method for multiple hypothesis testing correction">
117 <option value="BH" selected="True">Benjamini-Hochberg [FDR] (1995)</option> 124 <option value="BH" selected="True">Benjamini-Hochberg [FDR] (1995)</option>
118 <option value="holm">Holm (1979)</option> 125 <option value="holm">Holm (1979)</option>
119 <option value="hommel">Hommel (1988)</option> 126 <option value="hommel">Hommel (1988)</option>
120 <option value="hochberg">Hochberg (1988)</option> 127 <option value="hochberg">Hochberg (1988)</option>
121 <option value="bonferroni">Bonferroni</option> 128 <option value="bonferroni">Bonferroni</option>
122 <option value="BY">Benjamini - Yekutieli (2001)</option> 129 <option value="BY">Benjamini - Yekutieli (2001)</option>
123 </param> 130 </param>
124 <param name="use_genes_without_cat" type="boolean" checked="false" label="Count genes without any category?" help="For example, a large number of genes may have no GO term annotated. If this option is set to No, those genes will be ignored in the calculation of p-values. If this option is set to Yes, then these genes will count towards the total number of genes outside the category being tested. This was the default behaviour for version 1.15.1 and earlier. Default: No"/> 131 <param name="use_genes_without_cat" type="boolean" checked="false" label="Count genes without any category?" help="For example, a large number of genes may have no GO term annotated. If this option is set to No, those genes will be ignored in the calculation of p-values. If this option is set to Yes, then these genes will count towards the total number of genes outside the category being tested. This was the default behaviour for version 1.15.1 and earlier"/>
132 </section>
133 <!-- Output Options -->
134 <section name="out" title="Output Options">
135 <param name="topgo_plot" type="boolean" checked="false" label="Output Top GO terms plot?" help="Output a PDF plot of the Top 10 over-represented GO terms" />
136 <param name="make_plots" type="boolean" checked="false" truevalue="TRUE" falsevalue="FALSE" label="Produce diagnostic plots?" help="This will produce the length bias (PWF) plot. If both sampling and wallenius methods are selected, it will also produce a plot comparing their p-values. These plots may help you compare the different p-value estimation methods that goseq can use" />
137 <param name="cat_genes" type="boolean" checked="false" label="Extract the DE genes for the categories (GO/KEGG terms)?" help="" />
138 <param name="rdata_out" type="boolean" checked="false" label="Output RData file?" help="Output all the data used by R to construct the tables and plots, can be loaded into R" />
125 </section> 139 </section>
126 </inputs> 140 </inputs>
127
128 <outputs> 141 <outputs>
129 <data name="wallenius_tab" format="tabular" label="${tool.name} on ${on_string}: Ranked category list - Wallenius method"> 142 <data name="wallenius_tab" format="tabular" label="${tool.name} on ${on_string}: Ranked category list - Wallenius method">
130 <filter>methods['wallenius']</filter> 143 <filter>methods['wallenius']</filter>
131 </data> 144 </data>
132 <data name="sampling_tab" format="tabular" label="${tool.name} on ${on_string}: Ranked category list - Sampling method"> 145 <data name="sampling_tab" format="tabular" label="${tool.name} on ${on_string}: Ranked category list - Sampling method">
141 <data name="sample_vs_wallenius_plot" format="pdf" label="${tool.name} on ${on_string}: Sampling vs Wallenius P-values plot"> 154 <data name="sample_vs_wallenius_plot" format="pdf" label="${tool.name} on ${on_string}: Sampling vs Wallenius P-values plot">
142 <filter>methods['repcnt'] != 0</filter> 155 <filter>methods['repcnt'] != 0</filter>
143 <filter>methods['wallenius']</filter> 156 <filter>methods['wallenius']</filter>
144 <filter>out['make_plots']</filter> 157 <filter>out['make_plots']</filter>
145 </data> 158 </data>
146 <data name="rdata" format="rdata" from_work_dir="goseq_analysis.RData" label="${tool.name} on ${on_string}: RData file"> 159 <data name="top_plot" format="pdf" label="${tool.name} on ${on_string}: Top over-represented GO terms plot">
147 <filter>out['rdata_out']</filter>
148 </data>
149 <data name="top_plot" format="pdf" from_work_dir="top10.pdf" label="${tool.name} on ${on_string}: Top over-represented GO terms plot">
150 <filter>methods['wallenius']</filter> 160 <filter>methods['wallenius']</filter>
151 <filter>out['topgo_plot']</filter> 161 <filter>out['topgo_plot']</filter>
152 </data> 162 </data>
163 <data name="cat_genes_tab" format="tabular" label="${tool.name} on ${on_string}: DE genes for categories (GO/KEGG terms)">
164 <filter>out['cat_genes']</filter>
165 </data>
166 <data name="rdata" format="rdata" label="${tool.name} on ${on_string}: RData file">
167 <filter>out['rdata_out']</filter>
168 </data>
153 </outputs> 169 </outputs>
154
155 <tests> 170 <tests>
156 <!-- Ensure top plot is output --> 171 <!-- Ensure top plot is output and check Wallenius -->
157 <test expect_num_outputs="2"> 172 <test expect_num_outputs="2">
158 <param name="dge_file" value="dge_list.tab" ftype="tabular" /> 173 <param name="dge_file" value="dge_list.tab" ftype="tabular" />
159 <param name="length_file" value="gene_length.tab" ftype="tabular" /> 174 <param name="length_file" value="gene_length.tab" ftype="tabular" />
160 <param name="catSource" value="history" /> 175 <conditional name="categorySource">
161 <param name="category_file" value="category.tab" ftype="tabular" /> 176 <param name="catSource" value="history" />
162 <param name="use_genes_without_cat" value="true" /> 177 <param name="category_file" value="category.tab" ftype="tabular" />
163 <param name="topgo_plot" value="true" /> 178 </conditional>
179 <section name="methods">
180 <param name="wallenius" value="true"/>
181 <param name="hypergeometric" value="false"/>
182 <param name="repcnt" value="0"/>
183 </section>
184 <section name="adv">
185 <param name="p_adj_method" value="BH"/>
186 <param name="use_genes_without_cat" value="true" />
187 </section>
188 <section name="out">
189 <param name="topgo_plot" value="true"/>
190 <param name="make_plots" value="false"/>
191 <param name="cat_genes" value="false"/>
192 <param name="rdata_out" value="false"/>
193 </section>
164 <output name="top_plot" ftype="pdf" file="topgo.pdf" compare="sim_size"/> 194 <output name="top_plot" ftype="pdf" file="topgo.pdf" compare="sim_size"/>
165 </test>
166 <!-- Ensure Wallenius table is output -->
167 <test expect_num_outputs="1">
168 <param name="dge_file" value="dge_list.tab" ftype="tabular" />
169 <param name="length_file" value="gene_length.tab" ftype="tabular" />
170 <param name="catSource" value="history" />
171 <param name="category_file" value="category.tab" ftype="tabular" />
172 <param name="use_genes_without_cat" value="true" />
173 <output name="wallenius_tab"> 195 <output name="wallenius_tab">
174 <assert_contents> 196 <assert_contents>
175 <has_text_matching expression="category.*over_represented_pvalue.*under_represented_pvalue.*numDEInCat.*numInCat.*term.*ontology.*p.adjust.over_represented.*p.adjust.under_represented" /> 197 <has_text_matching expression="category.*over_represented_pvalue.*under_represented_pvalue.*numDEInCat.*numInCat.*term.*ontology.*p.adjust.over_represented.*p.adjust.under_represented" />
176 <has_text_matching expression="GO:0000278.*0.01" /> 198 <has_text_matching expression="GO:0000278.*0.01" />
177 </assert_contents> 199 </assert_contents>
178 </output> 200 </output>
179 </test> 201 </test>
180 <!-- Ensure getting GO categories works --> 202 <!-- Ensure getting GO categories works & also DE genes for GO terms-->
181 <test expect_num_outputs="1"> 203 <test expect_num_outputs="2">
182 <param name="dge_file" value="dge_list.tab" ftype="tabular"/> 204 <param name="dge_file" value="dge_list.tab" ftype="tabular"/>
183 <param name="length_file" value="gene_length.tab" ftype="tabular"/> 205 <param name="length_file" value="gene_length.tab" ftype="tabular"/>
184 <param name="catSource" value="getgo" /> 206 <conditional name="categorySource">
185 <param name="genome" value="hg38" /> 207 <param name="catSource" value="getgo" />
186 <param name="gene_id" value="ensGene" /> 208 <param name="genome" value="hg38" />
187 <param name="use_genes_without_cat" value="true" /> 209 <param name="gene_id" value="ensGene" />
210 <param name="fetchcats" value="GO:CC,GO:BP,GO:MF"/>
211 </conditional>
212 <section name="methods">
213 <param name="wallenius" value="true"/>
214 <param name="hypergeometric" value="false"/>
215 <param name="repcnt" value="0"/>
216 </section>
217 <section name="adv">
218 <param name="p_adj_method" value="BH"/>
219 <param name="use_genes_without_cat" value="true" />
220 </section>
221 <section name="out">
222 <param name="topgo_plot" value="false"/>
223 <param name="make_plots" value="false"/>
224 <param name="cat_genes" value="true"/>
225 <param name="rdata_out" value="false"/>
226 </section>
188 <output name="wallenius_tab"> 227 <output name="wallenius_tab">
189 <assert_contents> 228 <assert_contents>
190 <has_text_matching expression="category.*over_represented_pvalue.*under_represented_pvalue.*numDEInCat.*numInCat.*term.*ontology.*p.adjust.over_represented.*p.adjust.under_represented" /> 229 <has_text_matching expression="category.*over_represented_pvalue.*under_represented_pvalue.*numDEInCat.*numInCat.*term.*ontology.*p.adjust.over_represented.*p.adjust.under_represented" />
191 <has_text_matching expression="GO:0005576.*8.8" /> 230 <has_text_matching expression="GO:0005576.*9.0" />
231 </assert_contents>
232 </output>
233 <output name="cat_genes_tab">
234 <assert_contents>
235 <has_text_matching expression="Categories.*DEgenes" />
236 <has_text_matching expression="GO:0005615.*ENSG00000090402,ENSG00000108953,ENSG00000070961" />
192 </assert_contents> 237 </assert_contents>
193 </output> 238 </output>
194 </test> 239 </test>
195 <!-- Ensure getting GO categories for another genome (zebrafish) works --> 240 <!-- Ensure getting GO categories for another genome (zebrafish) works -->
196 <test expect_num_outputs="1"> 241 <test expect_num_outputs="1">
197 <param name="dge_file" value="dge_list_zf.tab" ftype="tabular"/> 242 <param name="dge_file" value="dge_list_zf.tab" ftype="tabular"/>
198 <param name="length_file" value="gene_length_zf.tab" ftype="tabular"/> 243 <param name="length_file" value="gene_length_zf.tab" ftype="tabular"/>
199 <param name="catSource" value="getgo" /> 244 <conditional name="categorySource">
200 <param name="genome" value="danRer10"/> 245 <param name="catSource" value="getgo" />
201 <param name="gene_id" value="ensGene" /> 246 <param name="genome" value="danRer10"/>
202 <param name="use_genes_without_cat" value="true" /> 247 <param name="gene_id" value="ensGene" />
248 <param name="fetchcats" value="GO:CC,GO:BP,GO:MF"/>
249 </conditional>
250 <section name="methods">
251 <param name="wallenius" value="true"/>
252 <param name="hypergeometric" value="false"/>
253 <param name="repcnt" value="0"/>
254 </section>
255 <section name="adv">
256 <param name="p_adj_method" value="BH"/>
257 <param name="use_genes_without_cat" value="true" />
258 </section>
259 <section name="out">
260 <param name="topgo_plot" value="false"/>
261 <param name="make_plots" value="false"/>
262 <param name="cat_genes" value="false"/>
263 <param name="rdata_out" value="false"/>
264 </section>
203 <output name="wallenius_tab"> 265 <output name="wallenius_tab">
204 <assert_contents> 266 <assert_contents>
205 <has_text_matching expression="category.*over_represented_pvalue.*under_represented_pvalue.*numDEInCat.*numInCat.*term.*ontology.*p.adjust.over_represented.*p.adjust.under_represented" /> 267 <has_text_matching expression="category.*over_represented_pvalue.*under_represented_pvalue.*numDEInCat.*numInCat.*term.*ontology.*p.adjust.over_represented.*p.adjust.under_represented" />
206 <has_text_matching expression="GO:0031324.*0.50" /> 268 <has_text_matching expression="GO:0016569.*0.8" />
207 </assert_contents> 269 </assert_contents>
208 </output> 270 </output>
209 </test> 271 </test>
210 <!-- Ensure length bias plot works --> 272 <!-- Ensure length bias plot works -->
211 <test expect_num_outputs="2"> 273 <test expect_num_outputs="2">
212 <param name="dge_file" value="dge_list.tab" ftype="tabular" /> 274 <param name="dge_file" value="dge_list.tab" ftype="tabular" />
213 <param name="length_file" value="gene_length.tab" ftype="tabular" /> 275 <param name="length_file" value="gene_length.tab" ftype="tabular" />
214 <param name="catSource" value="history" /> 276 <conditional name="categorySource">
215 <param name="category_file" value="category.tab" ftype="tabular" /> 277 <param name="catSource" value="history" />
278 <param name="category_file" value="category.tab" ftype="tabular" />
279 </conditional>
280 <section name="methods">
281 <param name="wallenius" value="true"/>
282 <param name="hypergeometric" value="false"/>
283 <param name="repcnt" value="0"/>
284 </section>
285 <section name="adv">
286 <param name="p_adj_method" value="BH"/>
287 <param name="use_genes_without_cat" value="true" />
288 </section>
289 <section name="out">
290 <param name="topgo_plot" value="false"/>
291 <param name="make_plots" value="true"/>
292 <param name="cat_genes" value="false"/>
293 <param name="rdata_out" value="false"/>
294 </section>
216 <param name="make_plots" value="true" /> 295 <param name="make_plots" value="true" />
217 <param name="use_genes_without_cat" value="true" />
218 <output name="length_bias_plot" ftype="pdf" file="length_bias_plot.pdf" compare="sim_size" /> 296 <output name="length_bias_plot" ftype="pdf" file="length_bias_plot.pdf" compare="sim_size" />
219 </test> 297 </test>
220 <!-- Ensure hypergeometric works --> 298 <!-- Ensure hypergeometric works -->
221 <test expect_num_outputs="2"> 299 <test expect_num_outputs="2">
222 <param name="dge_file" value="dge_list.tab" ftype="tabular" /> 300 <param name="dge_file" value="dge_list.tab" ftype="tabular" />
223 <param name="length_file" value="gene_length.tab" ftype="tabular" /> 301 <param name="length_file" value="gene_length.tab" ftype="tabular" />
224 <param name="catSource" value="history" /> 302 <conditional name="categorySource">
225 <param name="category_file" value="category.tab" ftype="tabular" /> 303 <param name="catSource" value="history" />
226 <param name="use_genes_without_cat" value="true" /> 304 <param name="category_file" value="category.tab" ftype="tabular" />
227 <param name="hypergeometric" value="true" /> 305 </conditional>
306 <section name="methods">
307 <param name="wallenius" value="true"/>
308 <param name="hypergeometric" value="true"/>
309 <param name="repcnt" value="0"/>
310 </section>
311 <section name="adv">
312 <param name="p_adj_method" value="BH"/>
313 <param name="use_genes_without_cat" value="true" />
314 </section>
315 <section name="out">
316 <param name="topgo_plot" value="false"/>
317 <param name="make_plots" value="false"/>
318 <param name="cat_genes" value="false"/>
319 <param name="rdata_out" value="false"/>
320 </section>
228 <output name="nobias_tab" file="nobias.tab" compare="contains" /> 321 <output name="nobias_tab" file="nobias.tab" compare="contains" />
229 </test> 322 </test>
230 <!-- Ensure sampling vs wallenius works --> 323 <!-- Ensure sampling vs wallenius works -->
231 <test expect_num_outputs="4"> 324 <test expect_num_outputs="4">
232 <param name="dge_file" value="dge_list.tab" ftype="tabular" /> 325 <param name="dge_file" value="dge_list.tab" ftype="tabular" />
233 <param name="length_file" value="gene_length.tab" ftype="tabular" /> 326 <param name="length_file" value="gene_length.tab" ftype="tabular" />
234 <param name="catSource" value="history" /> 327 <conditional name="categorySource">
235 <param name="category_file" value="category.tab" ftype="tabular" /> 328 <param name="catSource" value="history" />
236 <param name="use_genes_without_cat" value="true" /> 329 <param name="category_file" value="category.tab" ftype="tabular" />
237 <param name="make_plots" value="true" /> 330 </conditional>
238 <param name="repcnt" value="1000" /> 331 <section name="methods">
332 <param name="wallenius" value="true"/>
333 <param name="hypergeometric" value="false"/>
334 <param name="repcnt" value="1000"/>
335 </section>
336 <section name="adv">
337 <param name="p_adj_method" value="BH"/>
338 <param name="use_genes_without_cat" value="true" />
339 </section>
340 <section name="out">
341 <param name="topgo_plot" value="false"/>
342 <param name="make_plots" value="true"/>
343 <param name="cat_genes" value="false"/>
344 <param name="rdata_out" value="false"/>
345 </section>
239 <output name="sampling_tab" file="samp.tab" compare="sim_size" /> 346 <output name="sampling_tab" file="samp.tab" compare="sim_size" />
240 <output name="length_bias_plot" ftype="pdf" file="length_bias_plot.pdf" compare="sim_size" /> 347 <output name="length_bias_plot" ftype="pdf" file="length_bias_plot.pdf" compare="sim_size" />
241 <output name="sample_vs_wallenius_plot" ftype="pdf" file="sample_vs_wallenius_plot.pdf" compare="sim_size" /> 348 <output name="sample_vs_wallenius_plot" ftype="pdf" file="sample_vs_wallenius_plot.pdf" compare="sim_size" />
242 </test> 349 </test>
243 <!-- Ensure RData output works --> 350 <!-- Ensure RData output works -->
244 <test expect_num_outputs="2"> 351 <test expect_num_outputs="2">
245 <param name="dge_file" value="dge_list.tab" ftype="tabular" /> 352 <param name="dge_file" value="dge_list.tab" ftype="tabular" />
246 <param name="length_file" value="gene_length.tab" ftype="tabular" /> 353 <param name="length_file" value="gene_length.tab" ftype="tabular" />
247 <param name="catSource" value="history" /> 354 <conditional name="categorySource">
248 <param name="category_file" value="category.tab" ftype="tabular" /> 355 <param name="catSource" value="history" />
249 <param name="use_genes_without_cat" value="true" /> 356 <param name="category_file" value="category.tab" ftype="tabular" />
250 <param name="rdata_out" value="true" /> 357 </conditional>
358 <section name="methods">
359 <param name="wallenius" value="true"/>
360 <param name="hypergeometric" value="false"/>
361 <param name="repcnt" value="0"/>
362 </section>
363 <section name="adv">
364 <param name="p_adj_method" value="BH"/>
365 <param name="use_genes_without_cat" value="true" />
366 </section>
367 <section name="out">
368 <param name="topgo_plot" value="false"/>
369 <param name="make_plots" value="false"/>
370 <param name="cat_genes" value="false"/>
371 <param name="rdata_out" value="true"/>
372 </section>
251 <output name="rdata" file="goseq_analysis.RData" compare="sim_size" /> 373 <output name="rdata" file="goseq_analysis.RData" compare="sim_size" />
252 </test> 374 </test>
253 </tests> 375 </tests>
254
255 <help><![CDATA[ 376 <help><![CDATA[
256 377
257 .. class:: infomark 378 .. class:: infomark
258 379
259 **What it does** 380 **What it does**
310 431
311 ----- 432 -----
312 433
313 **Outputs** 434 **Outputs**
314 435
315 * This tool outputs a tabular file containing a ranked list of gene categories, similar to below. The default output is the Wallenius method table. If the Sampling and/or Hypergeometric methods are also selected, additional tables are produced. 436 This tool outputs a tabular file containing a ranked list of gene categories, similar to below. The default output is the Wallenius method table. If the Sampling and/or Hypergeometric methods are also selected, additional tables are produced.
316 * Optionally, this tool can also output a plot of the top 10 over-represented GO categories, some diagnostic plots and an RData file, see **Output Options** above.
317 437
318 Example: 438 Example:
319 439
320 =========== =============== ================ ============ ========== ======================================== ========== =================== ==================== 440 =========== =============== ================ ============ ========== ======================================== ========== =================== ====================
321 *category* *over_rep_pval* *under_rep_pval* *numDEInCat* *numInCat* *term* *ontology* *p.adjust.over_rep* *p.adjust.under_rep* 441 *category* *over_rep_pval* *under_rep_pval* *numDEInCat* *numInCat* *term* *ontology* *p.adjust.over_rep* *p.adjust.under_rep*
326 GO\:0044699 0.000279 0.999844 158 513 single-organism process BP 0.394825 1 446 GO\:0044699 0.000279 0.999844 158 513 single-organism process BP 0.394825 1
327 GO\:0065010 0.000428 0.999808 43 108 extracellular membrane-bounded organelle CC 0.394825 1 447 GO\:0065010 0.000428 0.999808 43 108 extracellular membrane-bounded organelle CC 0.394825 1
328 GO\:0070062 0.000428 0.999808 43 108 extracellular exosome CC 0.394825 1 448 GO\:0070062 0.000428 0.999808 43 108 extracellular exosome CC 0.394825 1
329 =========== =============== ================ ============ ========== ======================================== ========== =================== ==================== 449 =========== =============== ================ ============ ========== ======================================== ========== =================== ====================
330 450
451 Optionally, this tool can also output:
452 * a plot of the top 10 over-represented GO categories
453 * some diagnostic plots
454 * a tabular with the differentially expressed genes in categories (GO/KEGG terms)
455 * an RData file
456
331 ----- 457 -----
332 458
333 **Method options** 459 **Method options**
334 460
335 3 methods, *Wallenius*, *Sampling* and *Hypergeometric*, can be used to calculate the p-values as follows. 461 3 methods, *Wallenius*, *Sampling* and *Hypergeometric*, can be used to calculate the p-values as follows.