diff goseq.xml @ 0:ade933eff007 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/goseq commit b7dcd020c6a15fa55f392cc09cbc37580d6e75c4
author iuc
date Thu, 17 Nov 2016 16:40:19 -0500
parents
children 9d1256d9ef0b
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/goseq.xml	Thu Nov 17 16:40:19 2016 -0500
@@ -0,0 +1,140 @@
+<tool id="goseq" name="goseq" version="0.2.2">
+    <description>tests for overrepresented gene categories</description>
+    <requirements>
+        <requirement type="package" version="1.3.2">r-optparse</requirement>
+        <requirement type="package" version="1.22.0">bioconductor-goseq</requirement>
+    </requirements>
+    <stdio>
+        <regex match="Execution halted"
+               source="both"
+               level="fatal"
+               description="Execution halted." />
+        <regex match="Error in"
+               source="both"
+               level="fatal"
+               description="An undefined error occured, please check your input carefully and contact your administrator." />
+        <regex match="Fatal error"
+               source="both"
+               level="fatal"
+               description="An undefined error occured, please check your input carefully and contact your administrator." />
+    </stdio>
+    <command><![CDATA[
+        Rscript '$__tool_directory__'/goseq.r --dge_file '$dge_file'
+        --length_file '$length_file'
+        --category_file '$category_file'
+        #if $methods['wallenius']:
+        --wallenius_tab '$wallenius_tab'
+        #end if
+        #if $methods['hypergeometric']:
+        --nobias_tab 'nobias_tab'
+        #end if
+        --repcnt '$methods.repcnt'
+        --sampling_tab '$sampling_tab'
+        --p_adj_method '$p_adj_method'
+        --use_genes_without_cat '$use_genes_without_cat'
+        --make_plots '$make_plots'
+        --length_bias_plot '$length_bias_plot'
+        --sample_vs_wallenius_plot '$sample_vs_wallenius_plot'
+    ]]></command>
+    <inputs>
+        <param name="dge_file" help="A tabular file with gene names in the first column, and TRUE or FALSE in the last column. TRUE means a gene is differentially expressed. See help section for details." label="Differentially expressed gene file" type="data" format="tabular" />
+        <param name="length_file" label="Gene length file for length bias correction" help="You can calculate the gene length using the get length and gc content tool" type="data" format="tabular" />
+        <param name="category_file" label="Gene category file" help="You can obtain a mapping of gene id to gene ontology using the getgo tool" type="data" format="tabular" />
+        <param name="use_genes_without_cat" help="For example, a large number of gene may have no GO term annotated. If this option is set to FALSE, those genes will be ignored in the calculation of p-values. If this option is set to TRUE, then these genes will count towards the total number of genes outside the category being tested"
+               label="Count genes without any category?" type="boolean"/>
+        <section name="methods" title="Method options" expanded="True">
+            <param name="wallenius" type="boolean" checked="true" label="Use wallenius method" help="See help for details" />
+            <param name="hypergeometric" type="boolean" checked="false" label="Use hypergeometric method" help="Does not use gene length information. See help for details" />
+            <param name="repcnt" help="Draw this many random control gene sets. Set to 0 to not do sampling. Larger values take a long time" label="sampling depth" size="3" type="integer" min="0" max="10000" value="0" />
+        </section>
+        <param name="p_adj_method" type="select" label="Select a method for multiple hypothesis testing correction">
+            <option value="BH" selected="true">Benjamini-Hochberg [FDR] (1995)</option>
+            <option value="holm">Holm (1979)</option>
+            <option value="hommel">Hommel (1988)</option>
+            <option value="hochberg">Hochberg (1988)</option>
+            <option value="bonferroni">Bonferroni</option>
+            <option value="BY">Benjamini - Yekutieli (2001)</option>
+        </param>
+        <param help="These plots may help you compare the different p-value estimation methods that goseq can use." label="Produce diagnostic plots?" name="make_plots" type="boolean"></param>
+    </inputs>
+    <outputs>
+        <data name="length_bias_plot" format="pdf" label="length bias plot">
+            <filter>make_plots</filter>
+            <filter>methods['hypergeometric']</filter>
+        </data>
+        <data name="sample_vs_wallenius_plot" format="pdf" label="Plot P-value from sampling against wallenius distribution">
+            <filter>methods['repcnt'] != 0</filter>
+            <filter>methods['wallenius']</filter>
+            <filter>make_plots</filter>
+        </data>
+        <data name="nobias_tab" format="tabular" label="Ranked category list - no length bias correction">
+            <filter>methods['hypergeometric']</filter>
+        </data>
+        <data name="sampling_tab" format="tabular" label="Ranked category list - sampling">
+            <filter>methods['repcnt'] != 0</filter>
+        </data>
+        <data name="wallenius_tab" format="tabular" label="Ranked category list - wallenius method">
+            <filter>methods['wallenius']</filter>
+        </data>
+    </outputs>
+    <tests>
+        <test>
+            <param name="dge_file" value="dge_list.tab" ftype="tabular"/>
+            <param name="length_file" value="gene_length.tab" ftype="tabular"/>
+            <param name="category_file" value="category.tab" ftype="tabular"/>
+            <param name="use_genes_without_cat" value="true" />
+            <output name="wallenius_tab" file="wal.tab" compare="re_match"/>
+        </test>
+    </tests>
+    <help>
+
+        **What it does**
+
+        Detects Gene Ontology and/or other user defined categories which are over/under-represented in RNA-seq data.
+
+        Options map closely to the excellent manual_
+
+
+        **Input files**
+
+        *DGE list:*
+        goseq needs a tabular file with genes in the first column, and TRUE or FALSE in the last column.
+        TRUE means the gene should count as differentially expressed, FALSE means it is not differentially expressed.
+        You can use the "Compute an expression on every row" tool to create a TRUE / FALSE column for your dataset.
+
+        *Gene length file:*
+        goseq needs information about the length of a gene to correct for potential length bias in differentially expressed genes
+        using a prodbability weight function (PWF).
+        The format of this file is tabular, with gene_id in the first column and length in the second column.
+        The "get length and gc content" tool can produce such a file.
+
+        *Gene category file:*
+        You will also need a file describing the membership of genes in categories. The format of this file is gene_id in the first column,
+        category name in the second column. If you are interested in gene ontology categories you can use the getgo file to retrive
+        gene ontologies for model organisms, or you can construct your own file.
+
+        **Method options**
+
+        3 methods, "Wallenius", "Sampling" and "Hypergeometric", can be used to calculate the p-values as follows.
+
+        *"Wallenius"* approximates the true distribution of numbers of members of a category amongst DE genes by the Wallenius non-central hypergeometric distribution.
+        This distribution assumes that within a category all genes have the same probability of being chosen.
+        Therefore, this approximation works best when the range in probabilities obtained by the probability weighting function is small.
+
+        *"Sampling"* uses random sampling to approximate the true distribution and uses it to calculate the p-values for over (and under) representation of categories.
+        Although this is the most accurate method given a high enough value of sampling depth, its use quickly becomes computationally prohibitive.
+
+        *"Hypergeometric"* assumes there is no bias in power to detect differential expression at all and calculates the p-values using a standard hypergeometric distribution.
+        Useful if you wish to test the effect of selection bias on your results.
+
+        CAUTION:  "Hypergeometric" should NEVER be used for producing results for biological interpretation.
+        If there is genuinely no bias in power to detect DE in your experiment, the PWF will reflect this and the other methods will produce accuracte results.
+
+        .. _manual: https://bioconductor.org/packages/release/bioc/vignettes/goseq/inst/doc/goseq.pdf
+
+
+    </help>
+    <citations>
+        <citation type="doi">10.1186/gb-2010-11-2-r14</citation>
+    </citations>
+</tool>