# HG changeset patch # User iuc # Date 1538314025 14400 # Node ID ae39895af5fe825367132273d5abf197441a15a0 # Parent 783e8b70b047e652178297717fad8f36976ab00f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/goseq commit 0798278a90c08228a386516881680b328fc33f0c diff -r 783e8b70b047 -r ae39895af5fe goseq.r --- a/goseq.r Mon Sep 24 06:29:03 2018 -0400 +++ b/goseq.r Sun Sep 30 09:27:05 2018 -0400 @@ -40,7 +40,9 @@ length_file = args$length_file genome = args$genome gene_id = args$gene_id +wallenius_tab = args$wallenius_tab sampling_tab = args$sampling_tab +nobias_tab = args$nobias_tab length_bias_plot = args$length_bias_plot sample_vs_wallenius_plot = args$sample_vs_wallenius_plot repcnt = args$repcnt @@ -107,7 +109,7 @@ results <- list() # wallenius approximation of p-values -if (!is.null(args$wallenius_tab)) { +if (wallenius_tab != FALSE) { GO.wall=goseq(pwf, genome = genome, id = gene_id, use_genes_without_cat = use_genes_without_cat, gene2cat=go_map) GO.wall$p.adjust.over_represented = p.adjust(GO.wall$over_represented_pvalue, method=p_adj_method) GO.wall$p.adjust.under_represented = p.adjust(GO.wall$under_represented_pvalue, method=p_adj_method) @@ -116,7 +118,7 @@ } # hypergeometric (no length bias correction) -if (!is.null(args$nobias_tab)) { +if (nobias_tab != FALSE) { GO.nobias=goseq(pwf, genome = genome, id = gene_id, method="Hypergeometric", use_genes_without_cat = use_genes_without_cat, gene2cat=go_map) GO.nobias$p.adjust.over_represented = p.adjust(GO.nobias$over_represented_pvalue, method=p_adj_method) GO.nobias$p.adjust.under_represented = p.adjust(GO.nobias$under_represented_pvalue, method=p_adj_method) @@ -149,6 +151,7 @@ } if (!is.null(args$top_plot)) { + cats_title <- gsub("GO:","", args$fetch_cats) # modified from https://bioinformatics-core-shared-training.github.io/cruk-summer-school-2018/RNASeq2018/html/06_Gene_set_testing.nb.html pdf("top10.pdf") for (m in names(results)) { @@ -156,12 +159,12 @@ top_n(10, wt=-p.adjust.over_represented) %>% mutate(hitsPerc=numDEInCat*100/numInCat) %>% ggplot(aes(x=hitsPerc, - y=term, + y=substr(term, 1, 40), # only use 1st 40 chars of terms otherwise squashes plot colour=p.adjust.over_represented, size=numDEInCat)) + geom_point() + expand_limits(x=0) + - labs(x="% DE in category", y="Category", colour="adj. P value", size="Count", title=paste("Top over-represented categories in", fetch_cats), subtitle=paste(m, " method")) + + labs(x="% DE in category", y="Category", colour="adj. P value", size="Count", title=paste("Top over-represented categories in", cats_title), subtitle=paste(m, " method")) + theme(plot.title = element_text(hjust = 0.5), plot.subtitle = element_text(hjust = 0.5)) print(p) } diff -r 783e8b70b047 -r ae39895af5fe goseq.xml --- a/goseq.xml Mon Sep 24 06:29:03 2018 -0400 +++ b/goseq.xml Sun Sep 30 09:27:05 2018 -0400 @@ -1,4 +1,4 @@ - + tests for overrepresented gene categories r-optparse @@ -313,7 +313,7 @@ **Outputs** * This tool outputs a tabular file containing a ranked list of gene categories, similar to below. The default output is the Wallenius method table. If the Sampling and/or Hypergeometric methods are also selected, additional tables are produced. -* Optionally, this tool can also output a plot of the top 10 over-represented categories, some diagnostic plots and an RData file, see **Output Options** above. +* Optionally, this tool can also output a plot of the top 10 over-represented GO categories, some diagnostic plots and an RData file, see **Output Options** above. Example: diff -r 783e8b70b047 -r ae39895af5fe test-data/topgo.pdf Binary file test-data/topgo.pdf has changed