diff goseq.r @ 4:ae39895af5fe draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/goseq commit 0798278a90c08228a386516881680b328fc33f0c
author iuc
date Sun, 30 Sep 2018 09:27:05 -0400
parents 783e8b70b047
children bbcf5f7f2af2
line wrap: on
line diff
--- a/goseq.r	Mon Sep 24 06:29:03 2018 -0400
+++ b/goseq.r	Sun Sep 30 09:27:05 2018 -0400
@@ -40,7 +40,9 @@
 length_file = args$length_file
 genome = args$genome
 gene_id = args$gene_id
+wallenius_tab = args$wallenius_tab
 sampling_tab = args$sampling_tab
+nobias_tab = args$nobias_tab
 length_bias_plot = args$length_bias_plot
 sample_vs_wallenius_plot = args$sample_vs_wallenius_plot
 repcnt = args$repcnt
@@ -107,7 +109,7 @@
 results <- list()
 
 # wallenius approximation of p-values
-if (!is.null(args$wallenius_tab)) {
+if (wallenius_tab != FALSE) {
   GO.wall=goseq(pwf, genome = genome, id = gene_id, use_genes_without_cat = use_genes_without_cat, gene2cat=go_map)
   GO.wall$p.adjust.over_represented = p.adjust(GO.wall$over_represented_pvalue, method=p_adj_method)
   GO.wall$p.adjust.under_represented = p.adjust(GO.wall$under_represented_pvalue, method=p_adj_method)
@@ -116,7 +118,7 @@
 }
 
 # hypergeometric (no length bias correction)
-if (!is.null(args$nobias_tab)) {
+if (nobias_tab != FALSE) {
   GO.nobias=goseq(pwf, genome = genome, id = gene_id, method="Hypergeometric", use_genes_without_cat = use_genes_without_cat, gene2cat=go_map)
   GO.nobias$p.adjust.over_represented = p.adjust(GO.nobias$over_represented_pvalue, method=p_adj_method)
   GO.nobias$p.adjust.under_represented = p.adjust(GO.nobias$under_represented_pvalue, method=p_adj_method)
@@ -149,6 +151,7 @@
 }
 
 if (!is.null(args$top_plot)) {
+  cats_title <- gsub("GO:","", args$fetch_cats)
   # modified from https://bioinformatics-core-shared-training.github.io/cruk-summer-school-2018/RNASeq2018/html/06_Gene_set_testing.nb.html
   pdf("top10.pdf")
   for (m in names(results)) {
@@ -156,12 +159,12 @@
       top_n(10, wt=-p.adjust.over_represented)  %>%
       mutate(hitsPerc=numDEInCat*100/numInCat) %>%
       ggplot(aes(x=hitsPerc,
-                   y=term,
+                   y=substr(term, 1, 40), # only use 1st 40 chars of terms otherwise squashes plot
                    colour=p.adjust.over_represented,
                    size=numDEInCat)) +
       geom_point() +
       expand_limits(x=0) +
-      labs(x="% DE in category", y="Category", colour="adj. P value", size="Count", title=paste("Top over-represented categories in", fetch_cats), subtitle=paste(m, " method")) +
+      labs(x="% DE in category", y="Category", colour="adj. P value", size="Count", title=paste("Top over-represented categories in", cats_title), subtitle=paste(m, " method")) +
       theme(plot.title = element_text(hjust = 0.5), plot.subtitle = element_text(hjust = 0.5))
     print(p)
   }