comparison goseq.xml @ 0:ade933eff007 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/goseq commit b7dcd020c6a15fa55f392cc09cbc37580d6e75c4
author iuc
date Thu, 17 Nov 2016 16:40:19 -0500
parents
children 9d1256d9ef0b
comparison
equal deleted inserted replaced
-1:000000000000 0:ade933eff007
1 <tool id="goseq" name="goseq" version="0.2.2">
2 <description>tests for overrepresented gene categories</description>
3 <requirements>
4 <requirement type="package" version="1.3.2">r-optparse</requirement>
5 <requirement type="package" version="1.22.0">bioconductor-goseq</requirement>
6 </requirements>
7 <stdio>
8 <regex match="Execution halted"
9 source="both"
10 level="fatal"
11 description="Execution halted." />
12 <regex match="Error in"
13 source="both"
14 level="fatal"
15 description="An undefined error occured, please check your input carefully and contact your administrator." />
16 <regex match="Fatal error"
17 source="both"
18 level="fatal"
19 description="An undefined error occured, please check your input carefully and contact your administrator." />
20 </stdio>
21 <command><![CDATA[
22 Rscript '$__tool_directory__'/goseq.r --dge_file '$dge_file'
23 --length_file '$length_file'
24 --category_file '$category_file'
25 #if $methods['wallenius']:
26 --wallenius_tab '$wallenius_tab'
27 #end if
28 #if $methods['hypergeometric']:
29 --nobias_tab 'nobias_tab'
30 #end if
31 --repcnt '$methods.repcnt'
32 --sampling_tab '$sampling_tab'
33 --p_adj_method '$p_adj_method'
34 --use_genes_without_cat '$use_genes_without_cat'
35 --make_plots '$make_plots'
36 --length_bias_plot '$length_bias_plot'
37 --sample_vs_wallenius_plot '$sample_vs_wallenius_plot'
38 ]]></command>
39 <inputs>
40 <param name="dge_file" help="A tabular file with gene names in the first column, and TRUE or FALSE in the last column. TRUE means a gene is differentially expressed. See help section for details." label="Differentially expressed gene file" type="data" format="tabular" />
41 <param name="length_file" label="Gene length file for length bias correction" help="You can calculate the gene length using the get length and gc content tool" type="data" format="tabular" />
42 <param name="category_file" label="Gene category file" help="You can obtain a mapping of gene id to gene ontology using the getgo tool" type="data" format="tabular" />
43 <param name="use_genes_without_cat" help="For example, a large number of gene may have no GO term annotated. If this option is set to FALSE, those genes will be ignored in the calculation of p-values. If this option is set to TRUE, then these genes will count towards the total number of genes outside the category being tested"
44 label="Count genes without any category?" type="boolean"/>
45 <section name="methods" title="Method options" expanded="True">
46 <param name="wallenius" type="boolean" checked="true" label="Use wallenius method" help="See help for details" />
47 <param name="hypergeometric" type="boolean" checked="false" label="Use hypergeometric method" help="Does not use gene length information. See help for details" />
48 <param name="repcnt" help="Draw this many random control gene sets. Set to 0 to not do sampling. Larger values take a long time" label="sampling depth" size="3" type="integer" min="0" max="10000" value="0" />
49 </section>
50 <param name="p_adj_method" type="select" label="Select a method for multiple hypothesis testing correction">
51 <option value="BH" selected="true">Benjamini-Hochberg [FDR] (1995)</option>
52 <option value="holm">Holm (1979)</option>
53 <option value="hommel">Hommel (1988)</option>
54 <option value="hochberg">Hochberg (1988)</option>
55 <option value="bonferroni">Bonferroni</option>
56 <option value="BY">Benjamini - Yekutieli (2001)</option>
57 </param>
58 <param help="These plots may help you compare the different p-value estimation methods that goseq can use." label="Produce diagnostic plots?" name="make_plots" type="boolean"></param>
59 </inputs>
60 <outputs>
61 <data name="length_bias_plot" format="pdf" label="length bias plot">
62 <filter>make_plots</filter>
63 <filter>methods['hypergeometric']</filter>
64 </data>
65 <data name="sample_vs_wallenius_plot" format="pdf" label="Plot P-value from sampling against wallenius distribution">
66 <filter>methods['repcnt'] != 0</filter>
67 <filter>methods['wallenius']</filter>
68 <filter>make_plots</filter>
69 </data>
70 <data name="nobias_tab" format="tabular" label="Ranked category list - no length bias correction">
71 <filter>methods['hypergeometric']</filter>
72 </data>
73 <data name="sampling_tab" format="tabular" label="Ranked category list - sampling">
74 <filter>methods['repcnt'] != 0</filter>
75 </data>
76 <data name="wallenius_tab" format="tabular" label="Ranked category list - wallenius method">
77 <filter>methods['wallenius']</filter>
78 </data>
79 </outputs>
80 <tests>
81 <test>
82 <param name="dge_file" value="dge_list.tab" ftype="tabular"/>
83 <param name="length_file" value="gene_length.tab" ftype="tabular"/>
84 <param name="category_file" value="category.tab" ftype="tabular"/>
85 <param name="use_genes_without_cat" value="true" />
86 <output name="wallenius_tab" file="wal.tab" compare="re_match"/>
87 </test>
88 </tests>
89 <help>
90
91 **What it does**
92
93 Detects Gene Ontology and/or other user defined categories which are over/under-represented in RNA-seq data.
94
95 Options map closely to the excellent manual_
96
97
98 **Input files**
99
100 *DGE list:*
101 goseq needs a tabular file with genes in the first column, and TRUE or FALSE in the last column.
102 TRUE means the gene should count as differentially expressed, FALSE means it is not differentially expressed.
103 You can use the "Compute an expression on every row" tool to create a TRUE / FALSE column for your dataset.
104
105 *Gene length file:*
106 goseq needs information about the length of a gene to correct for potential length bias in differentially expressed genes
107 using a prodbability weight function (PWF).
108 The format of this file is tabular, with gene_id in the first column and length in the second column.
109 The "get length and gc content" tool can produce such a file.
110
111 *Gene category file:*
112 You will also need a file describing the membership of genes in categories. The format of this file is gene_id in the first column,
113 category name in the second column. If you are interested in gene ontology categories you can use the getgo file to retrive
114 gene ontologies for model organisms, or you can construct your own file.
115
116 **Method options**
117
118 3 methods, "Wallenius", "Sampling" and "Hypergeometric", can be used to calculate the p-values as follows.
119
120 *"Wallenius"* approximates the true distribution of numbers of members of a category amongst DE genes by the Wallenius non-central hypergeometric distribution.
121 This distribution assumes that within a category all genes have the same probability of being chosen.
122 Therefore, this approximation works best when the range in probabilities obtained by the probability weighting function is small.
123
124 *"Sampling"* uses random sampling to approximate the true distribution and uses it to calculate the p-values for over (and under) representation of categories.
125 Although this is the most accurate method given a high enough value of sampling depth, its use quickly becomes computationally prohibitive.
126
127 *"Hypergeometric"* assumes there is no bias in power to detect differential expression at all and calculates the p-values using a standard hypergeometric distribution.
128 Useful if you wish to test the effect of selection bias on your results.
129
130 CAUTION: "Hypergeometric" should NEVER be used for producing results for biological interpretation.
131 If there is genuinely no bias in power to detect DE in your experiment, the PWF will reflect this and the other methods will produce accuracte results.
132
133 .. _manual: https://bioconductor.org/packages/release/bioc/vignettes/goseq/inst/doc/goseq.pdf
134
135
136 </help>
137 <citations>
138 <citation type="doi">10.1186/gb-2010-11-2-r14</citation>
139 </citations>
140 </tool>