diff GO_MWU.R @ 2:5acf9dfdfa27 draft default tip

planemo upload commit 66a856bcce69986d9a6f1a39820dd9b3f4f6b0db
author cristian
date Wed, 09 Nov 2022 08:57:54 +0000
parents f7287f82602f
children
line wrap: on
line diff
--- a/GO_MWU.R	Tue Apr 19 08:28:43 2022 +0000
+++ b/GO_MWU.R	Wed Nov 09 08:57:54 2022 +0000
@@ -37,22 +37,36 @@
 library("getopt")
 library("tools")
 options(stringAsFactors = FALSE, useFancyQuotes = FALSE)
+
+getScriptPath <- function() {
+    cmd.args <- commandArgs()
+    m <- regexpr("(?<=^--file=).+", cmd.args, perl = TRUE)
+    script.dir <- dirname(regmatches(cmd.args, m))
+    if (length(script.dir) == 0) stop("can't determine script dir: please call the script with Rscript")
+    if (length(script.dir) > 1) stop("can't determine script dir: more than one '--file' argument detected")
+    return(script.dir)
+}
+
 args <- commandArgs(trailingOnly = TRUE)
 
 # get options, using the spec as defined by the enclosed list.
 # we read the options from the default: commandArgs(TRUE).
 spec <- matrix(c(
     "help", "h", 0, "logical",
-    "scriptdir", "s", 1, "character",
     "input", "i", 1, "character",
     "goAnnotations", "a", 1, "character",
     "goDatabase", "g", 1, "character",
     "goDivision", "d", 1, "character",
     "largest", "o", 1, "numeric",
     "smallest", "m", 1, "numeric",
+    "absValue", "k", 1, "numeric",
     "clusterheight", "c", 1, "numeric",
+    "textsize", "e", 1, "numeric",
     "pcut", "p", 1, "numeric",
     "hcut", "t", 1, "numeric",
+    "l1", "1", 1, "numeric",
+    "l2", "2", 1, "numeric",
+    "l3", "3", 1, "numeric",
     "version", "v", 0, "character"
 ), byrow = TRUE, ncol = 4)
 opt <- getopt(spec)
@@ -65,15 +79,34 @@
 }
 
 if (!is.null(opt$version)) {
-    cat("0.1.0\n")
+    cat("0.3.0\n")
     q(status = 1)
 }
 
+scriptdir <- getScriptPath()
+
+### if running in Rstudio ###
+# Uncomment this block, change the values and run from here.
+# opt = list()
+# opt$input = "heats.csv"
+# opt$goAnnotations = "amil_defog_iso2go.tab"
+# opt$goDatabase = "go.obo"
+# opt$goDivision = "BP"
+
+### Optionally uncomment the options below that you want to change and change the default value.
+# opt$clusterheight <- 0.25
+# opt$textsize <- 1.2
+# opt$largest <- 0.1
+# opt$smallest <- 5
+# opt$absValue <- -log(0.05,10)
+# opt$l1 <- 0.1
+# opt$l2 <- 0.05
+# opt$l3 <- 0.01
+# opt$pcut <- 1e-2
+# opt$hcut <- 0.9
+
+
 # enforce the following required arguments
-if (is.null(opt$scriptdir)) {
-    cat("'scriptdir' is required\n")
-    q(status = 1)
-}
 if (is.null(opt$input)) {
     cat("'input' is required\n")
     q(status = 1)
@@ -91,30 +124,40 @@
     q(status = 1)
 }
 if (is.null(opt$clusterheight)) {
-    cat("'clusterheight' is required\n")
-    q(status = 1)
+    opt$clusterheight <- 0.25
+}
+if (is.null(opt$textsize)) {
+    opt$textsize <- 1.2
 }
 if (is.null(opt$largest)) {
-    cat("'largest' is required\n")
-    q(status = 1)
+    opt$largest <- 0.1
 }
 if (is.null(opt$smallest)) {
-    cat("'smallest' is required\n")
-    q(status = 1)
+    opt$smallest <- 5
+}
+if (is.null(opt$absValue)) {
+    opt$absValue <- -log(0.05, 10)
+}
+if (is.null(opt$l1)) {
+    opt$l1 <- 0.1
+}
+if (is.null(opt$l2)) {
+    opt$l2 <- 0.05
+}
+if (is.null(opt$l3)) {
+    opt$l3 <- 0.01
 }
 if (is.null(opt$pcut)) {
-    cat("'pcut' is required\n")
-    q(status = 1)
+    opt$pcut <- 1e-2
 }
 if (is.null(opt$hcut)) {
-    cat("'hcut' is required\n")
-    q(status = 1)
+    opt$hcut <- 0.9
 }
 
 source_local <- function(fname) {
     # argv <- commandArgs(trailingOnly = FALSE)
     # base_dir <- dirname(substring(argv[grep("--file = ", argv)], 8))
-    base_dir <- opt$scriptdir
+    base_dir <- scriptdir
     source(paste(base_dir, fname, sep = "/"))
 }
 
@@ -136,7 +179,7 @@
 # change any of the numeric values below, delete the files that were generated
 # in previos runs first.
 
-gomwuStats(opt$input, opt$goDatabase, opt$goAnnotations, opt$goDivision, opt$scriptdir,
+gomwuStats(opt$input, opt$goDatabase, opt$goAnnotations, opt$goDivision, scriptdir,
     # replace with full path to perl executable if it is not in your system's PATH already
     perlPath = "perl",
     # a GO category will not be considered if it contains more than this fraction of the total number of genes
@@ -156,25 +199,27 @@
 # ----------- Plotting results
 
 # change this to a pdf output
-pdf(paste0(dir,"/","Rplots.pdf"))
+pdf(paste0(dir, "/", "Rplots.pdf"), width = 7, height = 7)
+# png(paste0(dir,"/","Rplots.png"),res=100)
 
 results <- gomwuPlot(opt$input, opt$goAnnotations, opt$goDivision,
     # genes with the measure value exceeding this will be counted as "good genes".
     # This setting is for signed log-pvalues. Specify absValue=0.001 if you are doing
     # Fisher's exact test for standard GO enrichment or analyzing a WGCNA module (all non-zero genes = "good genes").
-    absValue = -log(0.05, 10),
+    absValue = opt$absValue,
     # 	absValue = 1, # un-remark this if you are using log2-fold changes
     # FDR threshold for plotting. Specify level1=1 to plot all GO categories containing genes exceeding the absValue.
-    level1 = 0.1,
-    level2 = 0.05, # FDR cutoff to print in regular (not italic) font.
-    level3 = 0.01, # FDR cutoff to print in large bold font.
+    level1 = opt$l1,
+    level2 = opt$l2, # FDR cutoff to print in regular (not italic) font.
+    level3 = opt$l3, # FDR cutoff to print in large bold font.
     # decrease to fit more on one page, or increase (after rescaling the plot so the tree fits the text)
     # for better "word cloud" effect
-    txtsize = 1.2,
+    txtsize = opt$textsize,
     treeHeight = 0.5, # height of the hierarchical clustering tree
     # 	colors = c("dodgerblue2","firebrick1","skyblue2","lightcoral")
     # these are default colors, uncomment and change if needed
 )
+dev.off()
 # manually rescale the plot so the tree matches the text
 # if there are too many categories displayed, try make it more stringent with level1 = 0.05,level2=0.01,level3=0.001.
 
@@ -223,7 +268,6 @@
     }
 }
 
-mwus <- read.table(paste0(dir,"/",paste("MWU", opt$goDivision, name, sep = "_"), ".", ext), header = T)
+mwus <- read.table(paste0(dir, "/", paste("MWU", opt$goDivision, name, sep = "_"), ".", ext), header = T)
 best_GOs <- mwus[mwus$name %in% annots, ]
-write.table(best_GOs, paste0(dir, "/","best_go.tsv"), sep = "\t")
-dev.off()
+write.table(best_GOs, paste0(dir, "/", "best_go.tsv"), sep = "\t", row.names = FALSE)