Mercurial > repos > cristian > rbgoa
comparison GO_MWU.R @ 2:5acf9dfdfa27 draft default tip
planemo upload commit 66a856bcce69986d9a6f1a39820dd9b3f4f6b0db
author | cristian |
---|---|
date | Wed, 09 Nov 2022 08:57:54 +0000 |
parents | f7287f82602f |
children |
comparison
equal
deleted
inserted
replaced
1:f7287f82602f | 2:5acf9dfdfa27 |
---|---|
35 loc <- Sys.setlocale("LC_MESSAGES", "en_US.UTF-8") | 35 loc <- Sys.setlocale("LC_MESSAGES", "en_US.UTF-8") |
36 | 36 |
37 library("getopt") | 37 library("getopt") |
38 library("tools") | 38 library("tools") |
39 options(stringAsFactors = FALSE, useFancyQuotes = FALSE) | 39 options(stringAsFactors = FALSE, useFancyQuotes = FALSE) |
40 | |
41 getScriptPath <- function() { | |
42 cmd.args <- commandArgs() | |
43 m <- regexpr("(?<=^--file=).+", cmd.args, perl = TRUE) | |
44 script.dir <- dirname(regmatches(cmd.args, m)) | |
45 if (length(script.dir) == 0) stop("can't determine script dir: please call the script with Rscript") | |
46 if (length(script.dir) > 1) stop("can't determine script dir: more than one '--file' argument detected") | |
47 return(script.dir) | |
48 } | |
49 | |
40 args <- commandArgs(trailingOnly = TRUE) | 50 args <- commandArgs(trailingOnly = TRUE) |
41 | 51 |
42 # get options, using the spec as defined by the enclosed list. | 52 # get options, using the spec as defined by the enclosed list. |
43 # we read the options from the default: commandArgs(TRUE). | 53 # we read the options from the default: commandArgs(TRUE). |
44 spec <- matrix(c( | 54 spec <- matrix(c( |
45 "help", "h", 0, "logical", | 55 "help", "h", 0, "logical", |
46 "scriptdir", "s", 1, "character", | |
47 "input", "i", 1, "character", | 56 "input", "i", 1, "character", |
48 "goAnnotations", "a", 1, "character", | 57 "goAnnotations", "a", 1, "character", |
49 "goDatabase", "g", 1, "character", | 58 "goDatabase", "g", 1, "character", |
50 "goDivision", "d", 1, "character", | 59 "goDivision", "d", 1, "character", |
51 "largest", "o", 1, "numeric", | 60 "largest", "o", 1, "numeric", |
52 "smallest", "m", 1, "numeric", | 61 "smallest", "m", 1, "numeric", |
62 "absValue", "k", 1, "numeric", | |
53 "clusterheight", "c", 1, "numeric", | 63 "clusterheight", "c", 1, "numeric", |
64 "textsize", "e", 1, "numeric", | |
54 "pcut", "p", 1, "numeric", | 65 "pcut", "p", 1, "numeric", |
55 "hcut", "t", 1, "numeric", | 66 "hcut", "t", 1, "numeric", |
67 "l1", "1", 1, "numeric", | |
68 "l2", "2", 1, "numeric", | |
69 "l3", "3", 1, "numeric", | |
56 "version", "v", 0, "character" | 70 "version", "v", 0, "character" |
57 ), byrow = TRUE, ncol = 4) | 71 ), byrow = TRUE, ncol = 4) |
58 opt <- getopt(spec) | 72 opt <- getopt(spec) |
59 | 73 |
60 # if help was asked for print a friendly message | 74 # if help was asked for print a friendly message |
63 cat(getopt(spec, usage = TRUE)) | 77 cat(getopt(spec, usage = TRUE)) |
64 q(status = 1) | 78 q(status = 1) |
65 } | 79 } |
66 | 80 |
67 if (!is.null(opt$version)) { | 81 if (!is.null(opt$version)) { |
68 cat("0.1.0\n") | 82 cat("0.3.0\n") |
69 q(status = 1) | 83 q(status = 1) |
70 } | 84 } |
85 | |
86 scriptdir <- getScriptPath() | |
87 | |
88 ### if running in Rstudio ### | |
89 # Uncomment this block, change the values and run from here. | |
90 # opt = list() | |
91 # opt$input = "heats.csv" | |
92 # opt$goAnnotations = "amil_defog_iso2go.tab" | |
93 # opt$goDatabase = "go.obo" | |
94 # opt$goDivision = "BP" | |
95 | |
96 ### Optionally uncomment the options below that you want to change and change the default value. | |
97 # opt$clusterheight <- 0.25 | |
98 # opt$textsize <- 1.2 | |
99 # opt$largest <- 0.1 | |
100 # opt$smallest <- 5 | |
101 # opt$absValue <- -log(0.05,10) | |
102 # opt$l1 <- 0.1 | |
103 # opt$l2 <- 0.05 | |
104 # opt$l3 <- 0.01 | |
105 # opt$pcut <- 1e-2 | |
106 # opt$hcut <- 0.9 | |
107 | |
71 | 108 |
72 # enforce the following required arguments | 109 # enforce the following required arguments |
73 if (is.null(opt$scriptdir)) { | |
74 cat("'scriptdir' is required\n") | |
75 q(status = 1) | |
76 } | |
77 if (is.null(opt$input)) { | 110 if (is.null(opt$input)) { |
78 cat("'input' is required\n") | 111 cat("'input' is required\n") |
79 q(status = 1) | 112 q(status = 1) |
80 } | 113 } |
81 if (is.null(opt$goDatabase)) { | 114 if (is.null(opt$goDatabase)) { |
89 if (is.null(opt$goDivision)) { | 122 if (is.null(opt$goDivision)) { |
90 cat("'goDivision' is required\n") | 123 cat("'goDivision' is required\n") |
91 q(status = 1) | 124 q(status = 1) |
92 } | 125 } |
93 if (is.null(opt$clusterheight)) { | 126 if (is.null(opt$clusterheight)) { |
94 cat("'clusterheight' is required\n") | 127 opt$clusterheight <- 0.25 |
95 q(status = 1) | 128 } |
129 if (is.null(opt$textsize)) { | |
130 opt$textsize <- 1.2 | |
96 } | 131 } |
97 if (is.null(opt$largest)) { | 132 if (is.null(opt$largest)) { |
98 cat("'largest' is required\n") | 133 opt$largest <- 0.1 |
99 q(status = 1) | |
100 } | 134 } |
101 if (is.null(opt$smallest)) { | 135 if (is.null(opt$smallest)) { |
102 cat("'smallest' is required\n") | 136 opt$smallest <- 5 |
103 q(status = 1) | 137 } |
138 if (is.null(opt$absValue)) { | |
139 opt$absValue <- -log(0.05, 10) | |
140 } | |
141 if (is.null(opt$l1)) { | |
142 opt$l1 <- 0.1 | |
143 } | |
144 if (is.null(opt$l2)) { | |
145 opt$l2 <- 0.05 | |
146 } | |
147 if (is.null(opt$l3)) { | |
148 opt$l3 <- 0.01 | |
104 } | 149 } |
105 if (is.null(opt$pcut)) { | 150 if (is.null(opt$pcut)) { |
106 cat("'pcut' is required\n") | 151 opt$pcut <- 1e-2 |
107 q(status = 1) | |
108 } | 152 } |
109 if (is.null(opt$hcut)) { | 153 if (is.null(opt$hcut)) { |
110 cat("'hcut' is required\n") | 154 opt$hcut <- 0.9 |
111 q(status = 1) | |
112 } | 155 } |
113 | 156 |
114 source_local <- function(fname) { | 157 source_local <- function(fname) { |
115 # argv <- commandArgs(trailingOnly = FALSE) | 158 # argv <- commandArgs(trailingOnly = FALSE) |
116 # base_dir <- dirname(substring(argv[grep("--file = ", argv)], 8)) | 159 # base_dir <- dirname(substring(argv[grep("--file = ", argv)], 8)) |
117 base_dir <- opt$scriptdir | 160 base_dir <- scriptdir |
118 source(paste(base_dir, fname, sep = "/")) | 161 source(paste(base_dir, fname, sep = "/")) |
119 } | 162 } |
120 | 163 |
121 source_local("gomwu.functions.R") | 164 source_local("gomwu.functions.R") |
122 | 165 |
134 # It might take a few minutes for MF and BP. Do not rerun it if you just want | 177 # It might take a few minutes for MF and BP. Do not rerun it if you just want |
135 # to replot the data with different cutoffs, go straight to gomwuPlot. If you | 178 # to replot the data with different cutoffs, go straight to gomwuPlot. If you |
136 # change any of the numeric values below, delete the files that were generated | 179 # change any of the numeric values below, delete the files that were generated |
137 # in previos runs first. | 180 # in previos runs first. |
138 | 181 |
139 gomwuStats(opt$input, opt$goDatabase, opt$goAnnotations, opt$goDivision, opt$scriptdir, | 182 gomwuStats(opt$input, opt$goDatabase, opt$goAnnotations, opt$goDivision, scriptdir, |
140 # replace with full path to perl executable if it is not in your system's PATH already | 183 # replace with full path to perl executable if it is not in your system's PATH already |
141 perlPath = "perl", | 184 perlPath = "perl", |
142 # a GO category will not be considered if it contains more than this fraction of the total number of genes | 185 # a GO category will not be considered if it contains more than this fraction of the total number of genes |
143 largest = opt$largest, | 186 largest = opt$largest, |
144 smallest = opt$smallest, # a GO category should contain at least this many genes to be considered. | 187 smallest = opt$smallest, # a GO category should contain at least this many genes to be considered. |
154 | 197 |
155 | 198 |
156 # ----------- Plotting results | 199 # ----------- Plotting results |
157 | 200 |
158 # change this to a pdf output | 201 # change this to a pdf output |
159 pdf(paste0(dir,"/","Rplots.pdf")) | 202 pdf(paste0(dir, "/", "Rplots.pdf"), width = 7, height = 7) |
203 # png(paste0(dir,"/","Rplots.png"),res=100) | |
160 | 204 |
161 results <- gomwuPlot(opt$input, opt$goAnnotations, opt$goDivision, | 205 results <- gomwuPlot(opt$input, opt$goAnnotations, opt$goDivision, |
162 # genes with the measure value exceeding this will be counted as "good genes". | 206 # genes with the measure value exceeding this will be counted as "good genes". |
163 # This setting is for signed log-pvalues. Specify absValue=0.001 if you are doing | 207 # This setting is for signed log-pvalues. Specify absValue=0.001 if you are doing |
164 # Fisher's exact test for standard GO enrichment or analyzing a WGCNA module (all non-zero genes = "good genes"). | 208 # Fisher's exact test for standard GO enrichment or analyzing a WGCNA module (all non-zero genes = "good genes"). |
165 absValue = -log(0.05, 10), | 209 absValue = opt$absValue, |
166 # absValue = 1, # un-remark this if you are using log2-fold changes | 210 # absValue = 1, # un-remark this if you are using log2-fold changes |
167 # FDR threshold for plotting. Specify level1=1 to plot all GO categories containing genes exceeding the absValue. | 211 # FDR threshold for plotting. Specify level1=1 to plot all GO categories containing genes exceeding the absValue. |
168 level1 = 0.1, | 212 level1 = opt$l1, |
169 level2 = 0.05, # FDR cutoff to print in regular (not italic) font. | 213 level2 = opt$l2, # FDR cutoff to print in regular (not italic) font. |
170 level3 = 0.01, # FDR cutoff to print in large bold font. | 214 level3 = opt$l3, # FDR cutoff to print in large bold font. |
171 # decrease to fit more on one page, or increase (after rescaling the plot so the tree fits the text) | 215 # decrease to fit more on one page, or increase (after rescaling the plot so the tree fits the text) |
172 # for better "word cloud" effect | 216 # for better "word cloud" effect |
173 txtsize = 1.2, | 217 txtsize = opt$textsize, |
174 treeHeight = 0.5, # height of the hierarchical clustering tree | 218 treeHeight = 0.5, # height of the hierarchical clustering tree |
175 # colors = c("dodgerblue2","firebrick1","skyblue2","lightcoral") | 219 # colors = c("dodgerblue2","firebrick1","skyblue2","lightcoral") |
176 # these are default colors, uncomment and change if needed | 220 # these are default colors, uncomment and change if needed |
177 ) | 221 ) |
222 dev.off() | |
178 # manually rescale the plot so the tree matches the text | 223 # manually rescale the plot so the tree matches the text |
179 # if there are too many categories displayed, try make it more stringent with level1 = 0.05,level2=0.01,level3=0.001. | 224 # if there are too many categories displayed, try make it more stringent with level1 = 0.05,level2=0.01,level3=0.001. |
180 | 225 |
181 # text representation of results, with actual adjusted p-values | 226 # text representation of results, with actual adjusted p-values |
182 write.table(results[[1]], paste0(dir, "/", "results.tsv"), sep = "\t") | 227 write.table(results[[1]], paste0(dir, "/", "results.tsv"), sep = "\t") |
221 if (bestrr$pval[best] <= pcut) { | 266 if (bestrr$pval[best] <= pcut) { |
222 annots <- c(annots, sub("\\d+\\/\\d+ ", "", row.names(bestrr)[best])) | 267 annots <- c(annots, sub("\\d+\\/\\d+ ", "", row.names(bestrr)[best])) |
223 } | 268 } |
224 } | 269 } |
225 | 270 |
226 mwus <- read.table(paste0(dir,"/",paste("MWU", opt$goDivision, name, sep = "_"), ".", ext), header = T) | 271 mwus <- read.table(paste0(dir, "/", paste("MWU", opt$goDivision, name, sep = "_"), ".", ext), header = T) |
227 best_GOs <- mwus[mwus$name %in% annots, ] | 272 best_GOs <- mwus[mwus$name %in% annots, ] |
228 write.table(best_GOs, paste0(dir, "/","best_go.tsv"), sep = "\t") | 273 write.table(best_GOs, paste0(dir, "/", "best_go.tsv"), sep = "\t", row.names = FALSE) |
229 dev.off() |