diff goseq.xml @ 8:8b3e3657034e draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/goseq commit 8e19f8bcaea6f607a1eaa14bb88f2d625ed63df0"
author iuc
date Fri, 06 Sep 2019 07:50:46 -0400
parents 67c29afac85f
children ef2ad746b589
line wrap: on
line diff
--- a/goseq.xml	Sun Mar 17 10:27:17 2019 -0400
+++ b/goseq.xml	Fri Sep 06 07:50:46 2019 -0400
@@ -1,14 +1,18 @@
-<tool id="goseq" name="goseq" version="1.34.0+galaxy1">
+<tool id="goseq" name="goseq" version="@VERSION@+@GALAXY_VERSION@">
     <description>tests for overrepresented gene categories</description>
+    <macros>
+        <token name="@VERSION@">1.36.0</token>
+        <token name="@GALAXY_VERSION@">galaxy0</token>
+    </macros>
     <requirements>
-        <requirement type="package" version="1.34.0">bioconductor-goseq</requirement>
-        <requirement type="package" version="3.7.0">bioconductor-org.hs.eg.db</requirement>
-        <requirement type="package" version="3.7.0">bioconductor-org.dm.eg.db</requirement>
-        <requirement type="package" version="3.7.0">bioconductor-org.dr.eg.db</requirement>
-        <requirement type="package" version="3.7.0">bioconductor-org.mm.eg.db</requirement>
-        <requirement type="package" version="0.7.8">r-dplyr</requirement>
-        <requirement type="package" version="3.1.0">r-ggplot2</requirement>
-        <requirement type="package" version="1.6.0">r-optparse</requirement>
+        <requirement type="package" version="@VERSION@">bioconductor-goseq</requirement>
+        <requirement type="package" version="3.8.2">bioconductor-org.hs.eg.db</requirement>
+        <requirement type="package" version="3.8.2">bioconductor-org.dm.eg.db</requirement>
+        <requirement type="package" version="3.8.2">bioconductor-org.dr.eg.db</requirement>
+        <requirement type="package" version="3.8.2">bioconductor-org.mm.eg.db</requirement>
+        <requirement type="package" version="0.8.3">r-dplyr</requirement>
+        <requirement type="package" version="3.2.1">r-ggplot2</requirement>
+        <requirement type="package" version="1.6.2">r-optparse</requirement>
     </requirements>
     <stdio>
         <regex match="Execution halted"
@@ -30,42 +34,54 @@
     <command><![CDATA[
 Rscript '$__tool_directory__/goseq.r'
 
---dge_file '$dge_file'
---length_file '$length_file'
+    --dge_file '$dge_file'
+    --length_file '$length_file'
 
-#if $categorySource.catSource == 'getgo':
+#if $categorySource.catSource == 'getgo'
     --genome $categorySource.genome
     --gene_id $categorySource.gene_id
     --fetch_cats '$categorySource.fetchcats'
-#elif $categorySource.catSource == 'history':
+#elif $categorySource.catSource == 'history'
     --category_file '$categorySource.category_file'
 #end if
 
-#if $methods['wallenius']:
+#if $methods.wallenius
     --wallenius_tab '$wallenius_tab'
 #end if
-#if $methods['hypergeometric']:
+#if $methods.hypergeometric
     --nobias_tab '$nobias_tab'
 #end if
---repcnt '$methods.repcnt'
---sampling_tab '$sampling_tab'
 
---make_plots '$out.make_plots'
---length_bias_plot '$length_bias_plot'
---sample_vs_wallenius_plot '$sample_vs_wallenius_plot'
+    --repcnt $methods.repcnt
+#if $methods.repcnt != 0
+    --sampling_tab '$sampling_tab'
+#end if
 
---rdata '$out.rdata_out'
---p_adj_method '$adv.p_adj_method'
---use_genes_without_cat '$adv.use_genes_without_cat'
+    --p_adj_method '$adv.p_adj_method'
+    --use_genes_without_cat '$adv.use_genes_without_cat'
 
-#if $out.topgo_plot:
-    --top_plot '$out.topgo_plot'
+#if $out.topgo_plot
+    --top_plot '$top_plot'
 #end if
 
-    ]]></command>
+#if str($out.make_plots) == 'TRUE'
+    --make_plots '$out.make_plots'
+    --length_bias_plot '$length_bias_plot'
+    #if $methods.repcnt != 0 and $methods.wallenius
+    --sample_vs_wallenius_plot '$sample_vs_wallenius_plot'
+    #end if
+#end if
 
-    <!-- Input Files-->
+#if $out.cat_genes
+    --categories_genes_out_fp '$cat_genes_tab'
+#end if
+
+#if $out.rdata_out
+    --rdata '$rdata'
+#end if
+    ]]></command>
     <inputs>
+        <!-- Input Files-->
         <param name="dge_file" type="data" format="tabular" label="Differentially expressed genes file" help="A tabular file with Gene IDs in the first column, and True or False in the second column. True means a gene is differentially expressed. See Help section for details."/>
         <param name="length_file" type="data" format="tabular" label="Gene lengths file" help="You can calculate the gene lengths using featureCounts or the Gene length and GC content tool."/>
         <conditional name="categorySource">
@@ -96,21 +112,12 @@
                 <param name="category_file" type="data" format="tabular" label="Gene category file"/>
             </when>
         </conditional>
-
         <!-- Method Options -->
         <section name="methods" title="Method Options">
-            <param name="wallenius" type="boolean" checked="true" label="Use Wallenius method" help="See help for details. Default: Yes" />
-            <param name="hypergeometric" type="boolean" checked="false" label="Use Hypergeometric method" help="Does not use gene length information. See help for details. Default: No" />
-            <param name="repcnt" type="integer" size="3" min="0" max="10000" value="0" label="Sampling number" help="Number of random samples to be calculated when sampling is used. Set to 0 to not do sampling. Larger values take a long time. Default: 0" />
+            <param name="wallenius" type="boolean" checked="true" label="Use Wallenius method" help="See help for details" />
+            <param name="hypergeometric" type="boolean" checked="false" label="Use Hypergeometric method" help="Does not use gene length information. See help for details" />
+            <param name="repcnt" type="integer" size="3" min="0" max="10000" value="0" label="Sampling number" help="Number of random samples to be calculated when sampling is used. Set to 0 to not do sampling. Larger values take a long time" />
         </section>
-
-        <!-- Output Options -->
-        <section name="out" title="Output Options">
-            <param name="topgo_plot" type="boolean" checked="false" label="Output Top GO terms plot?" help="Output a PDF plot of the Top 10 over-represented GO terms. Default: No" />
-            <param name="make_plots" type="boolean" checked="false" label="Produce diagnostic plots?" help="This will produce the length bias (PWF) plot. If both sampling and wallenius methods are selected, it will also produce a plot comparing their p-values. These plots may help you compare the different p-value estimation methods that goseq can use. Default: No" />
-            <param name="rdata_out" type="boolean" checked="false" label="Output RData file?" help="Output all the data used by R to construct the tables and plots, can be loaded into R. Default: No" />
-        </section>
-
         <!-- Advanced Options -->
         <section name="adv" title="Advanced Options">
             <param name="p_adj_method" type="select" label="Select a method for multiple hypothesis testing correction">
@@ -121,10 +128,16 @@
                 <option value="bonferroni">Bonferroni</option>
                 <option value="BY">Benjamini - Yekutieli (2001)</option>
             </param>
-            <param name="use_genes_without_cat" type="boolean" checked="false" label="Count genes without any category?" help="For example, a large number of genes may have no GO term annotated. If this option is set to No, those genes will be ignored in the calculation of p-values. If this option is set to Yes, then these genes will count towards the total number of genes outside the category being tested. This was the default behaviour for version 1.15.1 and earlier. Default: No"/>
+            <param name="use_genes_without_cat" type="boolean" checked="false" label="Count genes without any category?" help="For example, a large number of genes may have no GO term annotated. If this option is set to No, those genes will be ignored in the calculation of p-values. If this option is set to Yes, then these genes will count towards the total number of genes outside the category being tested. This was the default behaviour for version 1.15.1 and earlier"/>
+        </section>
+        <!-- Output Options -->
+        <section name="out" title="Output Options">
+            <param name="topgo_plot" type="boolean" checked="false" label="Output Top GO terms plot?" help="Output a PDF plot of the Top 10 over-represented GO terms" />
+            <param name="make_plots" type="boolean" checked="false" truevalue="TRUE" falsevalue="FALSE" label="Produce diagnostic plots?" help="This will produce the length bias (PWF) plot. If both sampling and wallenius methods are selected, it will also produce a plot comparing their p-values. These plots may help you compare the different p-value estimation methods that goseq can use" />
+            <param name="cat_genes" type="boolean" checked="false" label="Extract the DE genes for the categories (GO/KEGG terms)?" help="" />
+            <param name="rdata_out" type="boolean" checked="false" label="Output RData file?" help="Output all the data used by R to construct the tables and plots, can be loaded into R" />
         </section>
     </inputs>
-
     <outputs>
         <data name="wallenius_tab" format="tabular" label="${tool.name} on ${on_string}: Ranked category list - Wallenius method">
             <filter>methods['wallenius']</filter>
@@ -143,33 +156,42 @@
             <filter>methods['wallenius']</filter>
             <filter>out['make_plots']</filter>
         </data>
-        <data name="rdata" format="rdata" from_work_dir="goseq_analysis.RData" label="${tool.name} on ${on_string}: RData file">
-            <filter>out['rdata_out']</filter>
-        </data>
-        <data name="top_plot" format="pdf" from_work_dir="top10.pdf" label="${tool.name} on ${on_string}: Top over-represented GO terms plot">
+        <data name="top_plot" format="pdf" label="${tool.name} on ${on_string}: Top over-represented GO terms plot">
             <filter>methods['wallenius']</filter>
             <filter>out['topgo_plot']</filter>
         </data>
+        <data name="cat_genes_tab" format="tabular" label="${tool.name} on ${on_string}: DE genes for categories (GO/KEGG terms)">
+            <filter>out['cat_genes']</filter>
+        </data>
+        <data name="rdata" format="rdata" label="${tool.name} on ${on_string}: RData file">
+            <filter>out['rdata_out']</filter>
+        </data>
     </outputs>
-
     <tests>
-        <!-- Ensure top plot is output -->
+        <!-- Ensure top plot is output and check Wallenius -->
         <test expect_num_outputs="2">
             <param name="dge_file" value="dge_list.tab" ftype="tabular" />
             <param name="length_file" value="gene_length.tab" ftype="tabular" />
-            <param name="catSource" value="history" />
-            <param name="category_file" value="category.tab" ftype="tabular" />
-            <param name="use_genes_without_cat" value="true" />
-            <param name="topgo_plot" value="true" />
+            <conditional name="categorySource">
+                <param name="catSource" value="history" />
+                <param name="category_file" value="category.tab" ftype="tabular" />
+            </conditional>
+            <section name="methods">
+                <param name="wallenius" value="true"/>
+                <param name="hypergeometric" value="false"/>
+                <param name="repcnt" value="0"/>
+            </section>
+            <section name="adv">
+                <param name="p_adj_method" value="BH"/>
+                <param name="use_genes_without_cat" value="true" />
+            </section>
+            <section name="out">
+                <param name="topgo_plot" value="true"/>
+                <param name="make_plots" value="false"/>
+                <param name="cat_genes" value="false"/>
+                <param name="rdata_out" value="false"/>
+            </section>
             <output name="top_plot" ftype="pdf" file="topgo.pdf" compare="sim_size"/>
-        </test>
-        <!-- Ensure Wallenius table is output -->
-        <test expect_num_outputs="1">
-            <param name="dge_file" value="dge_list.tab" ftype="tabular" />
-            <param name="length_file" value="gene_length.tab" ftype="tabular" />
-            <param name="catSource" value="history" />
-            <param name="category_file" value="category.tab" ftype="tabular" />
-            <param name="use_genes_without_cat" value="true" />
             <output name="wallenius_tab">
                 <assert_contents>
                     <has_text_matching expression="category.*over_represented_pvalue.*under_represented_pvalue.*numDEInCat.*numInCat.*term.*ontology.*p.adjust.over_represented.*p.adjust.under_represented" />
@@ -177,18 +199,41 @@
                 </assert_contents>
             </output>
         </test>
-        <!-- Ensure getting GO categories works -->
-        <test expect_num_outputs="1">
+        <!-- Ensure getting GO categories works & also DE genes for GO terms-->
+        <test expect_num_outputs="2">
             <param name="dge_file" value="dge_list.tab" ftype="tabular"/>
             <param name="length_file" value="gene_length.tab" ftype="tabular"/>
-            <param name="catSource" value="getgo" />
-            <param name="genome" value="hg38" />
-            <param name="gene_id" value="ensGene" />
-            <param name="use_genes_without_cat" value="true" />
+            <conditional name="categorySource">
+                <param name="catSource" value="getgo" />
+                <param name="genome" value="hg38" />
+                <param name="gene_id" value="ensGene" />
+                <param name="fetchcats" value="GO:CC,GO:BP,GO:MF"/>
+            </conditional>
+            <section name="methods">
+                <param name="wallenius" value="true"/>
+                <param name="hypergeometric" value="false"/>
+                <param name="repcnt" value="0"/>
+            </section>
+            <section name="adv">
+                <param name="p_adj_method" value="BH"/>
+                <param name="use_genes_without_cat" value="true" />
+            </section>
+            <section name="out">
+                <param name="topgo_plot" value="false"/>
+                <param name="make_plots" value="false"/>
+                <param name="cat_genes" value="true"/>
+                <param name="rdata_out" value="false"/>
+            </section>
             <output name="wallenius_tab">
                 <assert_contents>
                     <has_text_matching expression="category.*over_represented_pvalue.*under_represented_pvalue.*numDEInCat.*numInCat.*term.*ontology.*p.adjust.over_represented.*p.adjust.under_represented" />
-                    <has_text_matching expression="GO:0005576.*8.8" />
+                    <has_text_matching expression="GO:0005576.*9.0" />
+                </assert_contents>
+            </output>
+            <output name="cat_genes_tab">
+                <assert_contents>
+                    <has_text_matching expression="Categories.*DEgenes" />
+                    <has_text_matching expression="GO:0005615.*ENSG00000090402,ENSG00000108953,ENSG00000070961" />
                 </assert_contents>
             </output>
         </test>
@@ -196,14 +241,31 @@
         <test expect_num_outputs="1">
             <param name="dge_file" value="dge_list_zf.tab" ftype="tabular"/>
             <param name="length_file" value="gene_length_zf.tab" ftype="tabular"/>
-            <param name="catSource" value="getgo" />
-            <param name="genome" value="danRer10"/>
-            <param name="gene_id" value="ensGene" />
-            <param name="use_genes_without_cat" value="true" />
+            <conditional name="categorySource">
+                <param name="catSource" value="getgo" />
+                <param name="genome" value="danRer10"/>
+                <param name="gene_id" value="ensGene" />
+                <param name="fetchcats" value="GO:CC,GO:BP,GO:MF"/>
+            </conditional>
+            <section name="methods">
+                <param name="wallenius" value="true"/>
+                <param name="hypergeometric" value="false"/>
+                <param name="repcnt" value="0"/>
+            </section>
+            <section name="adv">
+                <param name="p_adj_method" value="BH"/>
+                <param name="use_genes_without_cat" value="true" />
+            </section>
+            <section name="out">
+                <param name="topgo_plot" value="false"/>
+                <param name="make_plots" value="false"/>
+                <param name="cat_genes" value="false"/>
+                <param name="rdata_out" value="false"/>
+            </section>
             <output name="wallenius_tab">
                 <assert_contents>
                     <has_text_matching expression="category.*over_represented_pvalue.*under_represented_pvalue.*numDEInCat.*numInCat.*term.*ontology.*p.adjust.over_represented.*p.adjust.under_represented" />
-                    <has_text_matching expression="GO:0031324.*0.50" />
+                    <has_text_matching expression="GO:0016569.*0.8" />
                 </assert_contents>
             </output>
         </test>
@@ -211,31 +273,76 @@
         <test expect_num_outputs="2">
             <param name="dge_file" value="dge_list.tab" ftype="tabular" />
             <param name="length_file" value="gene_length.tab" ftype="tabular" />
-            <param name="catSource" value="history" />
-            <param name="category_file" value="category.tab" ftype="tabular" />
+            <conditional name="categorySource">
+                <param name="catSource" value="history" />
+                <param name="category_file" value="category.tab" ftype="tabular" />
+            </conditional>
+            <section name="methods">
+                <param name="wallenius" value="true"/>
+                <param name="hypergeometric" value="false"/>
+                <param name="repcnt" value="0"/>
+            </section>
+            <section name="adv">
+                <param name="p_adj_method" value="BH"/>
+                <param name="use_genes_without_cat" value="true" />
+            </section>
+            <section name="out">
+                <param name="topgo_plot" value="false"/>
+                <param name="make_plots" value="true"/>
+                <param name="cat_genes" value="false"/>
+                <param name="rdata_out" value="false"/>
+            </section>
             <param name="make_plots" value="true" />
-            <param name="use_genes_without_cat" value="true" />
             <output name="length_bias_plot" ftype="pdf" file="length_bias_plot.pdf" compare="sim_size" />
         </test>
         <!-- Ensure hypergeometric works -->
         <test expect_num_outputs="2">
             <param name="dge_file" value="dge_list.tab" ftype="tabular" />
             <param name="length_file" value="gene_length.tab" ftype="tabular" />
-            <param name="catSource" value="history" />
-            <param name="category_file" value="category.tab" ftype="tabular" />
-            <param name="use_genes_without_cat" value="true" />
-            <param name="hypergeometric" value="true" />
+            <conditional name="categorySource">
+                <param name="catSource" value="history" />
+                <param name="category_file" value="category.tab" ftype="tabular" />
+            </conditional>
+            <section name="methods">
+                <param name="wallenius" value="true"/>
+                <param name="hypergeometric" value="true"/>
+                <param name="repcnt" value="0"/>
+            </section>
+            <section name="adv">
+                <param name="p_adj_method" value="BH"/>
+                <param name="use_genes_without_cat" value="true" />
+            </section>
+            <section name="out">
+                <param name="topgo_plot" value="false"/>
+                <param name="make_plots" value="false"/>
+                <param name="cat_genes" value="false"/>
+                <param name="rdata_out" value="false"/>
+            </section>
             <output name="nobias_tab" file="nobias.tab" compare="contains" />
         </test>
         <!-- Ensure sampling vs wallenius works -->
         <test expect_num_outputs="4">
             <param name="dge_file" value="dge_list.tab" ftype="tabular" />
             <param name="length_file" value="gene_length.tab" ftype="tabular" />
-            <param name="catSource" value="history" />
-            <param name="category_file" value="category.tab" ftype="tabular" />
-            <param name="use_genes_without_cat" value="true" />
-            <param name="make_plots" value="true" />
-            <param name="repcnt" value="1000" />
+            <conditional name="categorySource">
+                <param name="catSource" value="history" />
+                <param name="category_file" value="category.tab" ftype="tabular" />
+            </conditional>
+            <section name="methods">
+                <param name="wallenius" value="true"/>
+                <param name="hypergeometric" value="false"/>
+                <param name="repcnt" value="1000"/>
+            </section>
+            <section name="adv">
+                <param name="p_adj_method" value="BH"/>
+                <param name="use_genes_without_cat" value="true" />
+            </section>
+            <section name="out">
+                <param name="topgo_plot" value="false"/>
+                <param name="make_plots" value="true"/>
+                <param name="cat_genes" value="false"/>
+                <param name="rdata_out" value="false"/>
+            </section>
             <output name="sampling_tab" file="samp.tab" compare="sim_size" />
             <output name="length_bias_plot" ftype="pdf" file="length_bias_plot.pdf" compare="sim_size" />
             <output name="sample_vs_wallenius_plot" ftype="pdf" file="sample_vs_wallenius_plot.pdf" compare="sim_size" />
@@ -244,14 +351,28 @@
         <test expect_num_outputs="2">
             <param name="dge_file" value="dge_list.tab" ftype="tabular" />
             <param name="length_file" value="gene_length.tab" ftype="tabular" />
-            <param name="catSource" value="history" />
-            <param name="category_file" value="category.tab" ftype="tabular" />
-            <param name="use_genes_without_cat" value="true" />
-            <param name="rdata_out" value="true" />
+            <conditional name="categorySource">
+                <param name="catSource" value="history" />
+                <param name="category_file" value="category.tab" ftype="tabular" />
+            </conditional>
+            <section name="methods">
+                <param name="wallenius" value="true"/>
+                <param name="hypergeometric" value="false"/>
+                <param name="repcnt" value="0"/>
+            </section>
+            <section name="adv">
+                <param name="p_adj_method" value="BH"/>
+                <param name="use_genes_without_cat" value="true" />
+            </section>
+            <section name="out">
+                <param name="topgo_plot" value="false"/>
+                <param name="make_plots" value="false"/>
+                <param name="cat_genes" value="false"/>
+                <param name="rdata_out" value="true"/>
+            </section>
             <output name="rdata" file="goseq_analysis.RData" compare="sim_size" />
         </test>
     </tests>
-
     <help><![CDATA[
 
 .. class:: infomark
@@ -312,8 +433,7 @@
 
 **Outputs**
 
-* This tool outputs a tabular file containing a ranked list of gene categories, similar to below. The default output is the Wallenius method table. If the Sampling and/or Hypergeometric methods are also selected, additional tables are produced.
-* Optionally, this tool can also output a plot of the top 10 over-represented GO categories, some diagnostic plots and an RData file, see **Output Options** above.
+This tool outputs a tabular file containing a ranked list of gene categories, similar to below. The default output is the Wallenius method table. If the Sampling and/or Hypergeometric methods are also selected, additional tables are produced.
 
 Example:
 
@@ -328,6 +448,12 @@
 GO\:0070062  0.000428        0.999808         43           108       extracellular exosome                    CC         0.394825             1
 =========== =============== ================ ============ ========== ======================================== ========== =================== ====================
 
+Optionally, this tool can also output:
+  * a plot of the top 10 over-represented GO categories
+  * some diagnostic plots
+  * a tabular with the differentially expressed genes in categories (GO/KEGG terms)
+  * an RData file
+
 -----
 
 **Method options**