Mercurial > repos > iuc > goseq
comparison goseq.xml @ 0:ade933eff007 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/goseq commit b7dcd020c6a15fa55f392cc09cbc37580d6e75c4
author | iuc |
---|---|
date | Thu, 17 Nov 2016 16:40:19 -0500 |
parents | |
children | 9d1256d9ef0b |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:ade933eff007 |
---|---|
1 <tool id="goseq" name="goseq" version="0.2.2"> | |
2 <description>tests for overrepresented gene categories</description> | |
3 <requirements> | |
4 <requirement type="package" version="1.3.2">r-optparse</requirement> | |
5 <requirement type="package" version="1.22.0">bioconductor-goseq</requirement> | |
6 </requirements> | |
7 <stdio> | |
8 <regex match="Execution halted" | |
9 source="both" | |
10 level="fatal" | |
11 description="Execution halted." /> | |
12 <regex match="Error in" | |
13 source="both" | |
14 level="fatal" | |
15 description="An undefined error occured, please check your input carefully and contact your administrator." /> | |
16 <regex match="Fatal error" | |
17 source="both" | |
18 level="fatal" | |
19 description="An undefined error occured, please check your input carefully and contact your administrator." /> | |
20 </stdio> | |
21 <command><![CDATA[ | |
22 Rscript '$__tool_directory__'/goseq.r --dge_file '$dge_file' | |
23 --length_file '$length_file' | |
24 --category_file '$category_file' | |
25 #if $methods['wallenius']: | |
26 --wallenius_tab '$wallenius_tab' | |
27 #end if | |
28 #if $methods['hypergeometric']: | |
29 --nobias_tab 'nobias_tab' | |
30 #end if | |
31 --repcnt '$methods.repcnt' | |
32 --sampling_tab '$sampling_tab' | |
33 --p_adj_method '$p_adj_method' | |
34 --use_genes_without_cat '$use_genes_without_cat' | |
35 --make_plots '$make_plots' | |
36 --length_bias_plot '$length_bias_plot' | |
37 --sample_vs_wallenius_plot '$sample_vs_wallenius_plot' | |
38 ]]></command> | |
39 <inputs> | |
40 <param name="dge_file" help="A tabular file with gene names in the first column, and TRUE or FALSE in the last column. TRUE means a gene is differentially expressed. See help section for details." label="Differentially expressed gene file" type="data" format="tabular" /> | |
41 <param name="length_file" label="Gene length file for length bias correction" help="You can calculate the gene length using the get length and gc content tool" type="data" format="tabular" /> | |
42 <param name="category_file" label="Gene category file" help="You can obtain a mapping of gene id to gene ontology using the getgo tool" type="data" format="tabular" /> | |
43 <param name="use_genes_without_cat" help="For example, a large number of gene may have no GO term annotated. If this option is set to FALSE, those genes will be ignored in the calculation of p-values. If this option is set to TRUE, then these genes will count towards the total number of genes outside the category being tested" | |
44 label="Count genes without any category?" type="boolean"/> | |
45 <section name="methods" title="Method options" expanded="True"> | |
46 <param name="wallenius" type="boolean" checked="true" label="Use wallenius method" help="See help for details" /> | |
47 <param name="hypergeometric" type="boolean" checked="false" label="Use hypergeometric method" help="Does not use gene length information. See help for details" /> | |
48 <param name="repcnt" help="Draw this many random control gene sets. Set to 0 to not do sampling. Larger values take a long time" label="sampling depth" size="3" type="integer" min="0" max="10000" value="0" /> | |
49 </section> | |
50 <param name="p_adj_method" type="select" label="Select a method for multiple hypothesis testing correction"> | |
51 <option value="BH" selected="true">Benjamini-Hochberg [FDR] (1995)</option> | |
52 <option value="holm">Holm (1979)</option> | |
53 <option value="hommel">Hommel (1988)</option> | |
54 <option value="hochberg">Hochberg (1988)</option> | |
55 <option value="bonferroni">Bonferroni</option> | |
56 <option value="BY">Benjamini - Yekutieli (2001)</option> | |
57 </param> | |
58 <param help="These plots may help you compare the different p-value estimation methods that goseq can use." label="Produce diagnostic plots?" name="make_plots" type="boolean"></param> | |
59 </inputs> | |
60 <outputs> | |
61 <data name="length_bias_plot" format="pdf" label="length bias plot"> | |
62 <filter>make_plots</filter> | |
63 <filter>methods['hypergeometric']</filter> | |
64 </data> | |
65 <data name="sample_vs_wallenius_plot" format="pdf" label="Plot P-value from sampling against wallenius distribution"> | |
66 <filter>methods['repcnt'] != 0</filter> | |
67 <filter>methods['wallenius']</filter> | |
68 <filter>make_plots</filter> | |
69 </data> | |
70 <data name="nobias_tab" format="tabular" label="Ranked category list - no length bias correction"> | |
71 <filter>methods['hypergeometric']</filter> | |
72 </data> | |
73 <data name="sampling_tab" format="tabular" label="Ranked category list - sampling"> | |
74 <filter>methods['repcnt'] != 0</filter> | |
75 </data> | |
76 <data name="wallenius_tab" format="tabular" label="Ranked category list - wallenius method"> | |
77 <filter>methods['wallenius']</filter> | |
78 </data> | |
79 </outputs> | |
80 <tests> | |
81 <test> | |
82 <param name="dge_file" value="dge_list.tab" ftype="tabular"/> | |
83 <param name="length_file" value="gene_length.tab" ftype="tabular"/> | |
84 <param name="category_file" value="category.tab" ftype="tabular"/> | |
85 <param name="use_genes_without_cat" value="true" /> | |
86 <output name="wallenius_tab" file="wal.tab" compare="re_match"/> | |
87 </test> | |
88 </tests> | |
89 <help> | |
90 | |
91 **What it does** | |
92 | |
93 Detects Gene Ontology and/or other user defined categories which are over/under-represented in RNA-seq data. | |
94 | |
95 Options map closely to the excellent manual_ | |
96 | |
97 | |
98 **Input files** | |
99 | |
100 *DGE list:* | |
101 goseq needs a tabular file with genes in the first column, and TRUE or FALSE in the last column. | |
102 TRUE means the gene should count as differentially expressed, FALSE means it is not differentially expressed. | |
103 You can use the "Compute an expression on every row" tool to create a TRUE / FALSE column for your dataset. | |
104 | |
105 *Gene length file:* | |
106 goseq needs information about the length of a gene to correct for potential length bias in differentially expressed genes | |
107 using a prodbability weight function (PWF). | |
108 The format of this file is tabular, with gene_id in the first column and length in the second column. | |
109 The "get length and gc content" tool can produce such a file. | |
110 | |
111 *Gene category file:* | |
112 You will also need a file describing the membership of genes in categories. The format of this file is gene_id in the first column, | |
113 category name in the second column. If you are interested in gene ontology categories you can use the getgo file to retrive | |
114 gene ontologies for model organisms, or you can construct your own file. | |
115 | |
116 **Method options** | |
117 | |
118 3 methods, "Wallenius", "Sampling" and "Hypergeometric", can be used to calculate the p-values as follows. | |
119 | |
120 *"Wallenius"* approximates the true distribution of numbers of members of a category amongst DE genes by the Wallenius non-central hypergeometric distribution. | |
121 This distribution assumes that within a category all genes have the same probability of being chosen. | |
122 Therefore, this approximation works best when the range in probabilities obtained by the probability weighting function is small. | |
123 | |
124 *"Sampling"* uses random sampling to approximate the true distribution and uses it to calculate the p-values for over (and under) representation of categories. | |
125 Although this is the most accurate method given a high enough value of sampling depth, its use quickly becomes computationally prohibitive. | |
126 | |
127 *"Hypergeometric"* assumes there is no bias in power to detect differential expression at all and calculates the p-values using a standard hypergeometric distribution. | |
128 Useful if you wish to test the effect of selection bias on your results. | |
129 | |
130 CAUTION: "Hypergeometric" should NEVER be used for producing results for biological interpretation. | |
131 If there is genuinely no bias in power to detect DE in your experiment, the PWF will reflect this and the other methods will produce accuracte results. | |
132 | |
133 .. _manual: https://bioconductor.org/packages/release/bioc/vignettes/goseq/inst/doc/goseq.pdf | |
134 | |
135 | |
136 </help> | |
137 <citations> | |
138 <citation type="doi">10.1186/gb-2010-11-2-r14</citation> | |
139 </citations> | |
140 </tool> |