Mercurial > repos > cristian > rbgoa
diff GO_MWU.R @ 2:5acf9dfdfa27 draft default tip
planemo upload commit 66a856bcce69986d9a6f1a39820dd9b3f4f6b0db
author | cristian |
---|---|
date | Wed, 09 Nov 2022 08:57:54 +0000 |
parents | f7287f82602f |
children |
line wrap: on
line diff
--- a/GO_MWU.R Tue Apr 19 08:28:43 2022 +0000 +++ b/GO_MWU.R Wed Nov 09 08:57:54 2022 +0000 @@ -37,22 +37,36 @@ library("getopt") library("tools") options(stringAsFactors = FALSE, useFancyQuotes = FALSE) + +getScriptPath <- function() { + cmd.args <- commandArgs() + m <- regexpr("(?<=^--file=).+", cmd.args, perl = TRUE) + script.dir <- dirname(regmatches(cmd.args, m)) + if (length(script.dir) == 0) stop("can't determine script dir: please call the script with Rscript") + if (length(script.dir) > 1) stop("can't determine script dir: more than one '--file' argument detected") + return(script.dir) +} + args <- commandArgs(trailingOnly = TRUE) # get options, using the spec as defined by the enclosed list. # we read the options from the default: commandArgs(TRUE). spec <- matrix(c( "help", "h", 0, "logical", - "scriptdir", "s", 1, "character", "input", "i", 1, "character", "goAnnotations", "a", 1, "character", "goDatabase", "g", 1, "character", "goDivision", "d", 1, "character", "largest", "o", 1, "numeric", "smallest", "m", 1, "numeric", + "absValue", "k", 1, "numeric", "clusterheight", "c", 1, "numeric", + "textsize", "e", 1, "numeric", "pcut", "p", 1, "numeric", "hcut", "t", 1, "numeric", + "l1", "1", 1, "numeric", + "l2", "2", 1, "numeric", + "l3", "3", 1, "numeric", "version", "v", 0, "character" ), byrow = TRUE, ncol = 4) opt <- getopt(spec) @@ -65,15 +79,34 @@ } if (!is.null(opt$version)) { - cat("0.1.0\n") + cat("0.3.0\n") q(status = 1) } +scriptdir <- getScriptPath() + +### if running in Rstudio ### +# Uncomment this block, change the values and run from here. +# opt = list() +# opt$input = "heats.csv" +# opt$goAnnotations = "amil_defog_iso2go.tab" +# opt$goDatabase = "go.obo" +# opt$goDivision = "BP" + +### Optionally uncomment the options below that you want to change and change the default value. +# opt$clusterheight <- 0.25 +# opt$textsize <- 1.2 +# opt$largest <- 0.1 +# opt$smallest <- 5 +# opt$absValue <- -log(0.05,10) +# opt$l1 <- 0.1 +# opt$l2 <- 0.05 +# opt$l3 <- 0.01 +# opt$pcut <- 1e-2 +# opt$hcut <- 0.9 + + # enforce the following required arguments -if (is.null(opt$scriptdir)) { - cat("'scriptdir' is required\n") - q(status = 1) -} if (is.null(opt$input)) { cat("'input' is required\n") q(status = 1) @@ -91,30 +124,40 @@ q(status = 1) } if (is.null(opt$clusterheight)) { - cat("'clusterheight' is required\n") - q(status = 1) + opt$clusterheight <- 0.25 +} +if (is.null(opt$textsize)) { + opt$textsize <- 1.2 } if (is.null(opt$largest)) { - cat("'largest' is required\n") - q(status = 1) + opt$largest <- 0.1 } if (is.null(opt$smallest)) { - cat("'smallest' is required\n") - q(status = 1) + opt$smallest <- 5 +} +if (is.null(opt$absValue)) { + opt$absValue <- -log(0.05, 10) +} +if (is.null(opt$l1)) { + opt$l1 <- 0.1 +} +if (is.null(opt$l2)) { + opt$l2 <- 0.05 +} +if (is.null(opt$l3)) { + opt$l3 <- 0.01 } if (is.null(opt$pcut)) { - cat("'pcut' is required\n") - q(status = 1) + opt$pcut <- 1e-2 } if (is.null(opt$hcut)) { - cat("'hcut' is required\n") - q(status = 1) + opt$hcut <- 0.9 } source_local <- function(fname) { # argv <- commandArgs(trailingOnly = FALSE) # base_dir <- dirname(substring(argv[grep("--file = ", argv)], 8)) - base_dir <- opt$scriptdir + base_dir <- scriptdir source(paste(base_dir, fname, sep = "/")) } @@ -136,7 +179,7 @@ # change any of the numeric values below, delete the files that were generated # in previos runs first. -gomwuStats(opt$input, opt$goDatabase, opt$goAnnotations, opt$goDivision, opt$scriptdir, +gomwuStats(opt$input, opt$goDatabase, opt$goAnnotations, opt$goDivision, scriptdir, # replace with full path to perl executable if it is not in your system's PATH already perlPath = "perl", # a GO category will not be considered if it contains more than this fraction of the total number of genes @@ -156,25 +199,27 @@ # ----------- Plotting results # change this to a pdf output -pdf(paste0(dir,"/","Rplots.pdf")) +pdf(paste0(dir, "/", "Rplots.pdf"), width = 7, height = 7) +# png(paste0(dir,"/","Rplots.png"),res=100) results <- gomwuPlot(opt$input, opt$goAnnotations, opt$goDivision, # genes with the measure value exceeding this will be counted as "good genes". # This setting is for signed log-pvalues. Specify absValue=0.001 if you are doing # Fisher's exact test for standard GO enrichment or analyzing a WGCNA module (all non-zero genes = "good genes"). - absValue = -log(0.05, 10), + absValue = opt$absValue, # absValue = 1, # un-remark this if you are using log2-fold changes # FDR threshold for plotting. Specify level1=1 to plot all GO categories containing genes exceeding the absValue. - level1 = 0.1, - level2 = 0.05, # FDR cutoff to print in regular (not italic) font. - level3 = 0.01, # FDR cutoff to print in large bold font. + level1 = opt$l1, + level2 = opt$l2, # FDR cutoff to print in regular (not italic) font. + level3 = opt$l3, # FDR cutoff to print in large bold font. # decrease to fit more on one page, or increase (after rescaling the plot so the tree fits the text) # for better "word cloud" effect - txtsize = 1.2, + txtsize = opt$textsize, treeHeight = 0.5, # height of the hierarchical clustering tree # colors = c("dodgerblue2","firebrick1","skyblue2","lightcoral") # these are default colors, uncomment and change if needed ) +dev.off() # manually rescale the plot so the tree matches the text # if there are too many categories displayed, try make it more stringent with level1 = 0.05,level2=0.01,level3=0.001. @@ -223,7 +268,6 @@ } } -mwus <- read.table(paste0(dir,"/",paste("MWU", opt$goDivision, name, sep = "_"), ".", ext), header = T) +mwus <- read.table(paste0(dir, "/", paste("MWU", opt$goDivision, name, sep = "_"), ".", ext), header = T) best_GOs <- mwus[mwus$name %in% annots, ] -write.table(best_GOs, paste0(dir, "/","best_go.tsv"), sep = "\t") -dev.off() +write.table(best_GOs, paste0(dir, "/", "best_go.tsv"), sep = "\t", row.names = FALSE)