comparison GO_MWU.R @ 2:5acf9dfdfa27 draft default tip

planemo upload commit 66a856bcce69986d9a6f1a39820dd9b3f4f6b0db
author cristian
date Wed, 09 Nov 2022 08:57:54 +0000
parents f7287f82602f
children
comparison
equal deleted inserted replaced
1:f7287f82602f 2:5acf9dfdfa27
35 loc <- Sys.setlocale("LC_MESSAGES", "en_US.UTF-8") 35 loc <- Sys.setlocale("LC_MESSAGES", "en_US.UTF-8")
36 36
37 library("getopt") 37 library("getopt")
38 library("tools") 38 library("tools")
39 options(stringAsFactors = FALSE, useFancyQuotes = FALSE) 39 options(stringAsFactors = FALSE, useFancyQuotes = FALSE)
40
41 getScriptPath <- function() {
42 cmd.args <- commandArgs()
43 m <- regexpr("(?<=^--file=).+", cmd.args, perl = TRUE)
44 script.dir <- dirname(regmatches(cmd.args, m))
45 if (length(script.dir) == 0) stop("can't determine script dir: please call the script with Rscript")
46 if (length(script.dir) > 1) stop("can't determine script dir: more than one '--file' argument detected")
47 return(script.dir)
48 }
49
40 args <- commandArgs(trailingOnly = TRUE) 50 args <- commandArgs(trailingOnly = TRUE)
41 51
42 # get options, using the spec as defined by the enclosed list. 52 # get options, using the spec as defined by the enclosed list.
43 # we read the options from the default: commandArgs(TRUE). 53 # we read the options from the default: commandArgs(TRUE).
44 spec <- matrix(c( 54 spec <- matrix(c(
45 "help", "h", 0, "logical", 55 "help", "h", 0, "logical",
46 "scriptdir", "s", 1, "character",
47 "input", "i", 1, "character", 56 "input", "i", 1, "character",
48 "goAnnotations", "a", 1, "character", 57 "goAnnotations", "a", 1, "character",
49 "goDatabase", "g", 1, "character", 58 "goDatabase", "g", 1, "character",
50 "goDivision", "d", 1, "character", 59 "goDivision", "d", 1, "character",
51 "largest", "o", 1, "numeric", 60 "largest", "o", 1, "numeric",
52 "smallest", "m", 1, "numeric", 61 "smallest", "m", 1, "numeric",
62 "absValue", "k", 1, "numeric",
53 "clusterheight", "c", 1, "numeric", 63 "clusterheight", "c", 1, "numeric",
64 "textsize", "e", 1, "numeric",
54 "pcut", "p", 1, "numeric", 65 "pcut", "p", 1, "numeric",
55 "hcut", "t", 1, "numeric", 66 "hcut", "t", 1, "numeric",
67 "l1", "1", 1, "numeric",
68 "l2", "2", 1, "numeric",
69 "l3", "3", 1, "numeric",
56 "version", "v", 0, "character" 70 "version", "v", 0, "character"
57 ), byrow = TRUE, ncol = 4) 71 ), byrow = TRUE, ncol = 4)
58 opt <- getopt(spec) 72 opt <- getopt(spec)
59 73
60 # if help was asked for print a friendly message 74 # if help was asked for print a friendly message
63 cat(getopt(spec, usage = TRUE)) 77 cat(getopt(spec, usage = TRUE))
64 q(status = 1) 78 q(status = 1)
65 } 79 }
66 80
67 if (!is.null(opt$version)) { 81 if (!is.null(opt$version)) {
68 cat("0.1.0\n") 82 cat("0.3.0\n")
69 q(status = 1) 83 q(status = 1)
70 } 84 }
85
86 scriptdir <- getScriptPath()
87
88 ### if running in Rstudio ###
89 # Uncomment this block, change the values and run from here.
90 # opt = list()
91 # opt$input = "heats.csv"
92 # opt$goAnnotations = "amil_defog_iso2go.tab"
93 # opt$goDatabase = "go.obo"
94 # opt$goDivision = "BP"
95
96 ### Optionally uncomment the options below that you want to change and change the default value.
97 # opt$clusterheight <- 0.25
98 # opt$textsize <- 1.2
99 # opt$largest <- 0.1
100 # opt$smallest <- 5
101 # opt$absValue <- -log(0.05,10)
102 # opt$l1 <- 0.1
103 # opt$l2 <- 0.05
104 # opt$l3 <- 0.01
105 # opt$pcut <- 1e-2
106 # opt$hcut <- 0.9
107
71 108
72 # enforce the following required arguments 109 # enforce the following required arguments
73 if (is.null(opt$scriptdir)) {
74 cat("'scriptdir' is required\n")
75 q(status = 1)
76 }
77 if (is.null(opt$input)) { 110 if (is.null(opt$input)) {
78 cat("'input' is required\n") 111 cat("'input' is required\n")
79 q(status = 1) 112 q(status = 1)
80 } 113 }
81 if (is.null(opt$goDatabase)) { 114 if (is.null(opt$goDatabase)) {
89 if (is.null(opt$goDivision)) { 122 if (is.null(opt$goDivision)) {
90 cat("'goDivision' is required\n") 123 cat("'goDivision' is required\n")
91 q(status = 1) 124 q(status = 1)
92 } 125 }
93 if (is.null(opt$clusterheight)) { 126 if (is.null(opt$clusterheight)) {
94 cat("'clusterheight' is required\n") 127 opt$clusterheight <- 0.25
95 q(status = 1) 128 }
129 if (is.null(opt$textsize)) {
130 opt$textsize <- 1.2
96 } 131 }
97 if (is.null(opt$largest)) { 132 if (is.null(opt$largest)) {
98 cat("'largest' is required\n") 133 opt$largest <- 0.1
99 q(status = 1)
100 } 134 }
101 if (is.null(opt$smallest)) { 135 if (is.null(opt$smallest)) {
102 cat("'smallest' is required\n") 136 opt$smallest <- 5
103 q(status = 1) 137 }
138 if (is.null(opt$absValue)) {
139 opt$absValue <- -log(0.05, 10)
140 }
141 if (is.null(opt$l1)) {
142 opt$l1 <- 0.1
143 }
144 if (is.null(opt$l2)) {
145 opt$l2 <- 0.05
146 }
147 if (is.null(opt$l3)) {
148 opt$l3 <- 0.01
104 } 149 }
105 if (is.null(opt$pcut)) { 150 if (is.null(opt$pcut)) {
106 cat("'pcut' is required\n") 151 opt$pcut <- 1e-2
107 q(status = 1)
108 } 152 }
109 if (is.null(opt$hcut)) { 153 if (is.null(opt$hcut)) {
110 cat("'hcut' is required\n") 154 opt$hcut <- 0.9
111 q(status = 1)
112 } 155 }
113 156
114 source_local <- function(fname) { 157 source_local <- function(fname) {
115 # argv <- commandArgs(trailingOnly = FALSE) 158 # argv <- commandArgs(trailingOnly = FALSE)
116 # base_dir <- dirname(substring(argv[grep("--file = ", argv)], 8)) 159 # base_dir <- dirname(substring(argv[grep("--file = ", argv)], 8))
117 base_dir <- opt$scriptdir 160 base_dir <- scriptdir
118 source(paste(base_dir, fname, sep = "/")) 161 source(paste(base_dir, fname, sep = "/"))
119 } 162 }
120 163
121 source_local("gomwu.functions.R") 164 source_local("gomwu.functions.R")
122 165
134 # It might take a few minutes for MF and BP. Do not rerun it if you just want 177 # It might take a few minutes for MF and BP. Do not rerun it if you just want
135 # to replot the data with different cutoffs, go straight to gomwuPlot. If you 178 # to replot the data with different cutoffs, go straight to gomwuPlot. If you
136 # change any of the numeric values below, delete the files that were generated 179 # change any of the numeric values below, delete the files that were generated
137 # in previos runs first. 180 # in previos runs first.
138 181
139 gomwuStats(opt$input, opt$goDatabase, opt$goAnnotations, opt$goDivision, opt$scriptdir, 182 gomwuStats(opt$input, opt$goDatabase, opt$goAnnotations, opt$goDivision, scriptdir,
140 # replace with full path to perl executable if it is not in your system's PATH already 183 # replace with full path to perl executable if it is not in your system's PATH already
141 perlPath = "perl", 184 perlPath = "perl",
142 # a GO category will not be considered if it contains more than this fraction of the total number of genes 185 # a GO category will not be considered if it contains more than this fraction of the total number of genes
143 largest = opt$largest, 186 largest = opt$largest,
144 smallest = opt$smallest, # a GO category should contain at least this many genes to be considered. 187 smallest = opt$smallest, # a GO category should contain at least this many genes to be considered.
154 197
155 198
156 # ----------- Plotting results 199 # ----------- Plotting results
157 200
158 # change this to a pdf output 201 # change this to a pdf output
159 pdf(paste0(dir,"/","Rplots.pdf")) 202 pdf(paste0(dir, "/", "Rplots.pdf"), width = 7, height = 7)
203 # png(paste0(dir,"/","Rplots.png"),res=100)
160 204
161 results <- gomwuPlot(opt$input, opt$goAnnotations, opt$goDivision, 205 results <- gomwuPlot(opt$input, opt$goAnnotations, opt$goDivision,
162 # genes with the measure value exceeding this will be counted as "good genes". 206 # genes with the measure value exceeding this will be counted as "good genes".
163 # This setting is for signed log-pvalues. Specify absValue=0.001 if you are doing 207 # This setting is for signed log-pvalues. Specify absValue=0.001 if you are doing
164 # Fisher's exact test for standard GO enrichment or analyzing a WGCNA module (all non-zero genes = "good genes"). 208 # Fisher's exact test for standard GO enrichment or analyzing a WGCNA module (all non-zero genes = "good genes").
165 absValue = -log(0.05, 10), 209 absValue = opt$absValue,
166 # absValue = 1, # un-remark this if you are using log2-fold changes 210 # absValue = 1, # un-remark this if you are using log2-fold changes
167 # FDR threshold for plotting. Specify level1=1 to plot all GO categories containing genes exceeding the absValue. 211 # FDR threshold for plotting. Specify level1=1 to plot all GO categories containing genes exceeding the absValue.
168 level1 = 0.1, 212 level1 = opt$l1,
169 level2 = 0.05, # FDR cutoff to print in regular (not italic) font. 213 level2 = opt$l2, # FDR cutoff to print in regular (not italic) font.
170 level3 = 0.01, # FDR cutoff to print in large bold font. 214 level3 = opt$l3, # FDR cutoff to print in large bold font.
171 # decrease to fit more on one page, or increase (after rescaling the plot so the tree fits the text) 215 # decrease to fit more on one page, or increase (after rescaling the plot so the tree fits the text)
172 # for better "word cloud" effect 216 # for better "word cloud" effect
173 txtsize = 1.2, 217 txtsize = opt$textsize,
174 treeHeight = 0.5, # height of the hierarchical clustering tree 218 treeHeight = 0.5, # height of the hierarchical clustering tree
175 # colors = c("dodgerblue2","firebrick1","skyblue2","lightcoral") 219 # colors = c("dodgerblue2","firebrick1","skyblue2","lightcoral")
176 # these are default colors, uncomment and change if needed 220 # these are default colors, uncomment and change if needed
177 ) 221 )
222 dev.off()
178 # manually rescale the plot so the tree matches the text 223 # manually rescale the plot so the tree matches the text
179 # if there are too many categories displayed, try make it more stringent with level1 = 0.05,level2=0.01,level3=0.001. 224 # if there are too many categories displayed, try make it more stringent with level1 = 0.05,level2=0.01,level3=0.001.
180 225
181 # text representation of results, with actual adjusted p-values 226 # text representation of results, with actual adjusted p-values
182 write.table(results[[1]], paste0(dir, "/", "results.tsv"), sep = "\t") 227 write.table(results[[1]], paste0(dir, "/", "results.tsv"), sep = "\t")
221 if (bestrr$pval[best] <= pcut) { 266 if (bestrr$pval[best] <= pcut) {
222 annots <- c(annots, sub("\\d+\\/\\d+ ", "", row.names(bestrr)[best])) 267 annots <- c(annots, sub("\\d+\\/\\d+ ", "", row.names(bestrr)[best]))
223 } 268 }
224 } 269 }
225 270
226 mwus <- read.table(paste0(dir,"/",paste("MWU", opt$goDivision, name, sep = "_"), ".", ext), header = T) 271 mwus <- read.table(paste0(dir, "/", paste("MWU", opt$goDivision, name, sep = "_"), ".", ext), header = T)
227 best_GOs <- mwus[mwus$name %in% annots, ] 272 best_GOs <- mwus[mwus$name %in% annots, ]
228 write.table(best_GOs, paste0(dir, "/","best_go.tsv"), sep = "\t") 273 write.table(best_GOs, paste0(dir, "/", "best_go.tsv"), sep = "\t", row.names = FALSE)
229 dev.off()