diff fgsea.R @ 0:9bb7943b5263 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fgsea commit 9a6eda48463d6c19e9c5f3f2f8109f33de74855d-dirty
author iuc
date Sat, 20 Oct 2018 05:47:18 -0400
parents
children 17eb1e0d711f
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/fgsea.R	Sat Oct 20 05:47:18 2018 -0400
@@ -0,0 +1,88 @@
+options( show.error.messages=F, error = function () { cat( geterrmessage(), file=stderr() ); q( "no", 1, F ) } )
+
+# we need that to not crash galaxy with an UTF8 error on German LC settings.
+loc <- Sys.setlocale("LC_MESSAGES", "en_US.UTF-8")
+
+suppressPackageStartupMessages({
+  library(fgsea)
+  library(ggplot2)
+  library(optparse)
+})
+
+option_list <- list(
+  make_option(c("-rnk_file", "--rnk_file"), type="character", help="Path to ranked genes file"),
+  make_option(c("-header", "--header"), type="logical", help = "Does ranked genes file have a header"),
+  make_option(c("-sets_file", "--sets_file"), type="character", help = "Path to gene sets file"),
+  make_option(c("-gmt", "--gmt"), type="logical", help = "Is the sets file in GMT format"),
+  make_option(c("-out_tab","--out_tab"), type="character", help="Path to output file"),
+  make_option(c("-min_size", "--min_size"), type="integer", help="Minimal size of a gene set to test. All pathways below the threshold are excluded."),
+  make_option(c("-max_size", "--max_size"), type="integer", help="Maximal size of a gene set to test. All pathways above the threshold are excluded."),
+  make_option(c("-n_perm", "--n_perm"), type="integer", help="Number of permutations to do. Minimial possible nominal p-value is about 1/nperm"),
+  make_option(c("-rda_opt", "--rda_opt"), type="logical", help="Output RData file"),
+  make_option(c("-plot_opt", "--plot_opt"), type="logical", help="Output plot"),
+  make_option(c("-top_num", "--top_num"), type="integer", help="Top number of pathways to plot")
+)
+
+parser <- OptionParser(usage = "%prog [options] file", option_list=option_list)
+args = parse_args(parser)
+
+# Vars:
+rnk_file = args$rnk_file
+if (args$header) {
+  header = TRUE
+} else {
+  header = FALSE
+}
+sets_file = args$sets_file
+gmt = args$gmt
+out_tab = args$out_tab
+min_size = args$min_size
+max_size = args$max_size
+n_perm = args$n_perm
+rda_opt = args$rda_opt
+plot_opt = args$plot_opt
+top_num = args$top_num
+
+## Basically using the steps from the fgsea vignette
+rankTab <- read.table(rnk_file, header=header, colClasses = c("character", "numeric"))
+
+ranks <-rankTab[,2]
+names(ranks) <- rankTab[,1]
+
+if (gmt) {
+  pathways <- gmtPathways(sets_file)
+} else {
+  pathways <- load(sets_file)
+  pathways <- get(pathways)
+}
+
+fgseaRes <- fgsea(pathways, ranks, minSize=min_size, maxSize=max_size, nperm=n_perm)
+fgseaRes <- fgseaRes[order(pval), ]
+# Convert leadingEdge column from list to character to output
+fgseaRes$leadingEdge <- sapply(fgseaRes$leadingEdge, toString)
+
+write.table(fgseaRes, out_tab, sep="\t", row.names=FALSE, quote=FALSE)
+
+if (plot_opt) {
+  pdf("fgsea_plots.pdf", width=8)
+
+  topPathways <- head(fgseaRes, n=top_num)
+  topPathways <- topPathways$pathway
+
+  ## Make summary table plot for top pathways
+  plotGseaTable(pathways[topPathways], ranks, fgseaRes, gseaParam = 0.5,
+  colwidths = c(5.3,3,0.7, 0.9, 0.9))
+
+  # Make enrichment plots for top pathways
+  for (i in topPathways) {
+    p <- plotEnrichment(pathways[[i]], ranks) + labs(title=i)
+    print(p)
+  }
+
+  dev.off()
+}
+
+## Output RData file
+if (rda_opt) {
+    save.image(file = "fgsea_analysis.RData")
+}
\ No newline at end of file